src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1997, 1998, 2002, 2003, 2004, 2005
   5      National Institute of Advanced Industrial Science and Technology (AIST)
   6      Registration Number H14PRO021
   7    Copyright (C) 2003
   8      National Institute of Advanced Industrial Science and Technology (AIST)
   9      Registration Number H13PRO009
  10
  11 This file is part of GNU Emacs.
  12
  13 GNU Emacs is free software; you can redistribute it and/or modify
  14 it under the terms of the GNU General Public License as published by
  15 the Free Software Foundation; either version 2, or (at your option)
  16 any later version.
  17
  18 GNU Emacs is distributed in the hope that it will be useful,
  19 but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21 GNU General Public License for more details.
  22
  23 You should have received a copy of the GNU General Public License
  24 along with GNU Emacs; see the file COPYING.  If not, write to
  25 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  26 Boston, MA 02110-1301, USA.  */
  27
  28 /*** TABLE OF CONTENTS ***
  29
  30   0. General comments
  31   1. Preamble
  32   2. Emacs' internal format (emacs-utf-8) handlers
  33   3. UTF-8 handlers
  34   4. UTF-16 handlers
  35   5. Charset-base coding systems handlers
  36   6. emacs-mule (old Emacs' internal format) handlers
  37   7. ISO2022 handlers
  38   8. Shift-JIS and BIG5 handlers
  39   9. CCL handlers
  40   10. C library functions
  41   11. Emacs Lisp library functions
  42   12. Postamble
  43
  44 */
  45
  46 /*** 0. General comments ***
  47
  48
  49 CODING SYSTEM
  50
  51   A coding system is an object for an encoding mechanism that contains
  52   information about how to convert byte sequences to character
  53   sequences and vice versa.  When we say "decode", it means converting
  54   a byte sequence of a specific coding system into a character
  55   sequence that is represented by Emacs' internal coding system
  56   `emacs-utf-8', and when we say "encode", it means converting a
  57   character sequence of emacs-utf-8 to a byte sequence of a specific
  58   coding system.
  59
  60   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  61   C level, a coding system is represented by a vector of attributes
  62   stored in the hash table Vcharset_hash_table.  The conversion from
  63   coding system symbol to attributes vector is done by looking up
  64   Vcharset_hash_table by the symbol.
  65
  66   Coding systems are classified into the following types depending on
  67   the encoding mechanism.  Here's a brief description of the types.
  68
  69   o UTF-8
  70
  71   o UTF-16
  72
  73   o Charset-base coding system
  74
  75   A coding system defined by one or more (coded) character sets.
  76   Decoding and encoding are done by a code converter defined for each
  77   character set.
  78
  79   o Old Emacs internal format (emacs-mule)
  80
  81   The coding system adopted by old versions of Emacs (20 and 21).
  82
  83   o ISO2022-base coding system
  84
  85   The most famous coding system for multiple character sets.  X's
  86   Compound Text, various EUCs (Extended Unix Code), and coding systems
  87   used in the Internet communication such as ISO-2022-JP are all
  88   variants of ISO2022.
  89
  90   o SJIS (or Shift-JIS or MS-Kanji-Code)
  91
  92   A coding system to encode character sets: ASCII, JISX0201, and
  93   JISX0208.  Widely used for PC's in Japan.  Details are described in
  94   section 8.
  95
  96   o BIG5
  97
  98   A coding system to encode character sets: ASCII and Big5.  Widely
  99   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
 100   described in section 8.  In this file, when we write "big5" (all
 101   lowercase), we mean the coding system, and when we write "Big5"
 102   (capitalized), we mean the character set.
 103
 104   o CCL
 105
 106   If a user wants to decode/encode text encoded in a coding system
 107   not listed above, he can supply a decoder and an encoder for it in
 108   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 109   program while decoding/encoding.
 110
 111   o Raw-text
 112
 113   A coding system for text containing raw eight-bit data.  Emacs
 114   treats each byte of source text as a character (except for
 115   end-of-line conversion).
 116
 117   o No-conversion
 118
 119   Like raw text, but don't do end-of-line conversion.
 120
 121
 122 END-OF-LINE FORMAT
 123
 124   How text end-of-line is encoded depends on operating system.  For
 125   instance, Unix's format is just one byte of LF (line-feed) code,
 126   whereas DOS's format is two-byte sequence of `carriage-return' and
 127   `line-feed' codes.  MacOS's format is usually one byte of
 128   `carriage-return'.
 129
 130   Since text character encoding and end-of-line encoding are
 131   independent, any coding system described above can take any format
 132   of end-of-line (except for no-conversion).
 133
 134 STRUCT CODING_SYSTEM
 135
 136   Before using a coding system for code conversion (i.e. decoding and
 137   encoding), we setup a structure of type `struct coding_system'.
 138   This structure keeps various information about a specific code
 139   conversion (e.g. the location of source and destination data).
 140
 141 */
 142
 143 /* COMMON MACROS */
 144
 145
 146 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 147
 148   These functions check if a byte sequence specified as a source in
 149   CODING conforms to the format of XXX, and update the members of
 150   DETECT_INFO.
 151
 152   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 153
 154   Below is the template of these functions.  */
 155
 156 #if 0
 157 static int
 158 detect_coding_XXX (coding, detect_info)
 159      struct coding_system *coding;
 160      struct coding_detection_info *detect_info;
 161 {
 162   const unsigned char *src = coding->source;
 163   const unsigned char *src_end = coding->source + coding->src_bytes;
 164   int multibytep = coding->src_multibyte;
 165   int consumed_chars = 0;
 166   int found = 0;
 167   ...;
 168
 169   while (1)
 170     {
 171       /* Get one byte from the source.  If the souce is exausted, jump
 172          to no_more_source:.  */
 173       ONE_MORE_BYTE (c);
 174
 175       if (! __C_conforms_to_XXX___ (c))
 176         break;
 177       if (! __C_strongly_suggests_XXX__ (c))
 178         found = CATEGORY_MASK_XXX;
 179     }
 180   /* The byte sequence is invalid for XXX.  */
 181   detect_info->rejected |= CATEGORY_MASK_XXX;
 182   return 0;
 183
 184  no_more_source:
 185   /* The source exausted successfully.  */
 186   detect_info->found |= found;
 187   return 1;
 188 }
 189 #endif
 190
 191 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 192
 193   These functions decode a byte sequence specified as a source by
 194   CODING.  The resulting multibyte text goes to a place pointed to by
 195   CODING->charbuf, the length of which should not exceed
 196   CODING->charbuf_size;
 197
 198   These functions set the information of original and decoded texts in
 199   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 200   They also set CODING->result to one of CODING_RESULT_XXX indicating
 201   how the decoding is finished.
 202
 203   Below is the template of these functions.  */
 204
 205 #if 0
 206 static void
 207 decode_coding_XXXX (coding)
 208      struct coding_system *coding;
 209 {
 210   const unsigned char *src = coding->source + coding->consumed;
 211   const unsigned char *src_end = coding->source + coding->src_bytes;
 212   /* SRC_BASE remembers the start position in source in each loop.
 213      The loop will be exited when there's not enough source code, or
 214      when there's no room in CHARBUF for a decoded character.  */
 215   const unsigned char *src_base;
 216   /* A buffer to produce decoded characters.  */
 217   int *charbuf = coding->charbuf + coding->charbuf_used;
 218   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 219   int multibytep = coding->src_multibyte;
 220
 221   while (1)
 222     {
 223       src_base = src;
 224       if (charbuf < charbuf_end)
 225         /* No more room to produce a decoded character.  */
 226         break;
 227       ONE_MORE_BYTE (c);
 228       /* Decode it. */
 229     }
 230
 231  no_more_source:
 232   if (src_base < src_end
 233       && coding->mode & CODING_MODE_LAST_BLOCK)
 234     /* If the source ends by partial bytes to construct a character,
 235        treat them as eight-bit raw data.  */
 236     while (src_base < src_end && charbuf < charbuf_end)
 237       *charbuf++ = *src_base++;
 238   /* Remember how many bytes and characters we consumed.  If the
 239      source is multibyte, the bytes and chars are not identical.  */
 240   coding->consumed = coding->consumed_char = src_base - coding->source;
 241   /* Remember how many characters we produced.  */
 242   coding->charbuf_used = charbuf - coding->charbuf;
 243 }
 244 #endif
 245
 246 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 247
 248   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 249   internal multibyte format by CODING.  The resulting byte sequence
 250   goes to a place pointed to by DESTINATION, the length of which
 251   should not exceed DST_BYTES.
 252
 253   These functions set the information of original and encoded texts in
 254   the members produced, produced_char, consumed, and consumed_char of
 255   the structure *CODING.  They also set the member result to one of
 256   CODING_RESULT_XXX indicating how the encoding finished.
 257
 258   DST_BYTES zero means that source area and destination area are
 259   overlapped, which means that we can produce a encoded text until it
 260   reaches at the head of not-yet-encoded source text.
 261
 262   Below is a template of these functions.  */
 263 #if 0
 264 static void
 265 encode_coding_XXX (coding)
 266      struct coding_system *coding;
 267 {
 268   int multibytep = coding->dst_multibyte;
 269   int *charbuf = coding->charbuf;
 270   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 271   unsigned char *dst = coding->destination + coding->produced;
 272   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 273   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 274   int produced_chars = 0;
 275
 276   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 277     {
 278       int c = *charbuf;
 279       /* Encode C into DST, and increment DST.  */
 280     }
 281  label_no_more_destination:
 282   /* How many chars and bytes we produced.  */
 283   coding->produced_char += produced_chars;
 284   coding->produced = dst - coding->destination;
 285 }
 286 #endif
 287
 288 \f
 289 /*** 1. Preamble ***/
 290
 291 #include <config.h>
 292 #include <stdio.h>
 293
 294 #include "lisp.h"
 295 #include "buffer.h"
 296 #include "character.h"
 297 #include "charset.h"
 298 #include "ccl.h"
 299 #include "composite.h"
 300 #include "coding.h"
 301 #include "window.h"
 302
 303 Lisp_Object Vcoding_system_hash_table;
 304
 305 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 306 Lisp_Object Qunix, Qdos;
 307 extern Lisp_Object Qmac;        /* frame.c */
 308 Lisp_Object Qbuffer_file_coding_system;
 309 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 310 Lisp_Object Qdefault_char;
 311 Lisp_Object Qno_conversion, Qundecided;
 312 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 313 Lisp_Object Qbig, Qlittle;
 314 Lisp_Object Qcoding_system_history;
 315 Lisp_Object Qvalid_codes;
 316 Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
 317 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 318 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 319 Lisp_Object QCascii_compatible_p;
 320
 321 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 322 Lisp_Object Qcall_process, Qcall_process_region;
 323 Lisp_Object Qstart_process, Qopen_network_stream;
 324 Lisp_Object Qtarget_idx;
 325
 326 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 327 Lisp_Object Qinterrupted, Qinsufficient_memory;
 328
 329 /* If a symbol has this property, evaluate the value to define the
 330    symbol as a coding system.  */
 331 static Lisp_Object Qcoding_system_define_form;
 332
 333 int coding_system_require_warning;
 334
 335 Lisp_Object Vselect_safe_coding_system_function;
 336
 337 /* Mnemonic string for each format of end-of-line.  */
 338 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 339 /* Mnemonic string to indicate format of end-of-line is not yet
 340    decided.  */
 341 Lisp_Object eol_mnemonic_undecided;
 342
 343 /* Format of end-of-line decided by system.  This is Qunix on
 344    Unix and Mac, Qdos on DOS/Windows.
 345    This has an effect only for external encoding (i.e. for output to
 346    file and process), not for in-buffer or Lisp string encoding.  */
 347 static Lisp_Object system_eol_type;
 348
 349 #ifdef emacs
 350
 351 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 352
 353 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 354
 355 /* Coding system emacs-mule and raw-text are for converting only
 356    end-of-line format.  */
 357 Lisp_Object Qemacs_mule, Qraw_text;
 358 Lisp_Object Qutf_8_emacs;
 359
 360 /* Coding-systems are handed between Emacs Lisp programs and C internal
 361    routines by the following three variables.  */
 362 /* Coding-system for reading files and receiving data from process.  */
 363 Lisp_Object Vcoding_system_for_read;
 364 /* Coding-system for writing files and sending data to process.  */
 365 Lisp_Object Vcoding_system_for_write;
 366 /* Coding-system actually used in the latest I/O.  */
 367 Lisp_Object Vlast_coding_system_used;
 368 /* Set to non-nil when an error is detected while code conversion.  */
 369 Lisp_Object Vlast_code_conversion_error;
 370 /* A vector of length 256 which contains information about special
 371    Latin codes (especially for dealing with Microsoft codes).  */
 372 Lisp_Object Vlatin_extra_code_table;
 373
 374 /* Flag to inhibit code conversion of end-of-line format.  */
 375 int inhibit_eol_conversion;
 376
 377 /* Flag to inhibit ISO2022 escape sequence detection.  */
 378 int inhibit_iso_escape_detection;
 379
 380 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 381 int inherit_process_coding_system;
 382
 383 /* Coding system to be used to encode text for terminal display.  */
 384 struct coding_system terminal_coding;
 385
 386 /* Coding system to be used to encode text for terminal display when
 387    terminal coding system is nil.  */
 388 struct coding_system safe_terminal_coding;
 389
 390 /* Coding system of what is sent from terminal keyboard.  */
 391 struct coding_system keyboard_coding;
 392
 393 Lisp_Object Vfile_coding_system_alist;
 394 Lisp_Object Vprocess_coding_system_alist;
 395 Lisp_Object Vnetwork_coding_system_alist;
 396
 397 Lisp_Object Vlocale_coding_system;
 398
 399 #endif /* emacs */
 400
 401 /* Flag to tell if we look up translation table on character code
 402    conversion.  */
 403 Lisp_Object Venable_character_translation;
 404 /* Standard translation table to look up on decoding (reading).  */
 405 Lisp_Object Vstandard_translation_table_for_decode;
 406 /* Standard translation table to look up on encoding (writing).  */
 407 Lisp_Object Vstandard_translation_table_for_encode;
 408
 409 Lisp_Object Qtranslation_table;
 410 Lisp_Object Qtranslation_table_id;
 411 Lisp_Object Qtranslation_table_for_decode;
 412 Lisp_Object Qtranslation_table_for_encode;
 413
 414 /* Alist of charsets vs revision number.  */
 415 static Lisp_Object Vcharset_revision_table;
 416
 417 /* Default coding systems used for process I/O.  */
 418 Lisp_Object Vdefault_process_coding_system;
 419
 420 /* Char table for translating Quail and self-inserting input.  */
 421 Lisp_Object Vtranslation_table_for_input;
 422
 423 /* Two special coding systems.  */
 424 Lisp_Object Vsjis_coding_system;
 425 Lisp_Object Vbig5_coding_system;
 426
 427 /* ISO2022 section */
 428
 429 #define CODING_ISO_INITIAL(coding, reg)                 \
 430   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 431                      coding_attr_iso_initial),          \
 432                reg)))
 433
 434
 435 #define CODING_ISO_REQUEST(coding, charset_id)  \
 436   ((charset_id <= (coding)->max_charset_id      \
 437     ? (coding)->safe_charsets[charset_id]       \
 438     : -1))
 439
 440
 441 #define CODING_ISO_FLAGS(coding)        \
 442   ((coding)->spec.iso_2022.flags)
 443 #define CODING_ISO_DESIGNATION(coding, reg)     \
 444   ((coding)->spec.iso_2022.current_designation[reg])
 445 #define CODING_ISO_INVOCATION(coding, plane)    \
 446   ((coding)->spec.iso_2022.current_invocation[plane])
 447 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 448   ((coding)->spec.iso_2022.single_shifting)
 449 #define CODING_ISO_BOL(coding)  \
 450   ((coding)->spec.iso_2022.bol)
 451 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 452   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 453
 454 /* Control characters of ISO2022.  */
 455                         /* code */      /* function */
 456 #define ISO_CODE_LF     0x0A            /* line-feed */
 457 #define ISO_CODE_CR     0x0D            /* carriage-return */
 458 #define ISO_CODE_SO     0x0E            /* shift-out */
 459 #define ISO_CODE_SI     0x0F            /* shift-in */
 460 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 461 #define ISO_CODE_ESC    0x1B            /* escape */
 462 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 463 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 464 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 465
 466 /* All code (1-byte) of ISO2022 is classified into one of the
 467    followings.  */
 468 enum iso_code_class_type
 469   {
 470     ISO_control_0,              /* Control codes in the range
 471                                    0x00..0x1F and 0x7F, except for the
 472                                    following 5 codes.  */
 473     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 474     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 475     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 476     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 477     ISO_control_1,              /* Control codes in the range
 478                                    0x80..0x9F, except for the
 479                                    following 3 codes.  */
 480     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 481     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 482     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 483     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 484     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 485     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 486     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 487   };
 488
 489 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 490     `iso-flags' attribute of an iso2022 coding system.  */
 491
 492 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 493    instead of the correct short-form sequence (e.g. ESC $ A).  */
 494 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 495
 496 /* If set, reset graphic planes and registers at end-of-line to the
 497    initial state.  */
 498 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 499
 500 /* If set, reset graphic planes and registers before any control
 501    characters to the initial state.  */
 502 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 503
 504 /* If set, encode by 7-bit environment.  */
 505 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 506
 507 /* If set, use locking-shift function.  */
 508 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 509
 510 /* If set, use single-shift function.  Overwrite
 511    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 512 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 513
 514 /* If set, use designation escape sequence.  */
 515 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 516
 517 /* If set, produce revision number sequence.  */
 518 #define CODING_ISO_FLAG_REVISION        0x0080
 519
 520 /* If set, produce ISO6429's direction specifying sequence.  */
 521 #define CODING_ISO_FLAG_DIRECTION       0x0100
 522
 523 /* If set, assume designation states are reset at beginning of line on
 524    output.  */
 525 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 526
 527 /* If set, designation sequence should be placed at beginning of line
 528    on output.  */
 529 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 530
 531 /* If set, do not encode unsafe charactes on output.  */
 532 #define CODING_ISO_FLAG_SAFE            0x0800
 533
 534 /* If set, extra latin codes (128..159) are accepted as a valid code
 535    on input.  */
 536 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 537
 538 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 539
 540 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 541
 542 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 543
 544 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 545
 546 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 547
 548 /* A character to be produced on output if encoding of the original
 549    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 550 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 551
 552
 553 /* UTF-16 section */
 554 #define CODING_UTF_16_BOM(coding)       \
 555   ((coding)->spec.utf_16.bom)
 556
 557 #define CODING_UTF_16_ENDIAN(coding)    \
 558   ((coding)->spec.utf_16.endian)
 559
 560 #define CODING_UTF_16_SURROGATE(coding) \
 561   ((coding)->spec.utf_16.surrogate)
 562
 563
 564 /* CCL section */
 565 #define CODING_CCL_DECODER(coding)      \
 566   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 567 #define CODING_CCL_ENCODER(coding)      \
 568   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 569 #define CODING_CCL_VALIDS(coding)                                          \
 570   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 571
 572 /* Index for each coding category in `coding_categories' */
 573
 574 enum coding_category
 575   {
 576     coding_category_iso_7,
 577     coding_category_iso_7_tight,
 578     coding_category_iso_8_1,
 579     coding_category_iso_8_2,
 580     coding_category_iso_7_else,
 581     coding_category_iso_8_else,
 582     coding_category_utf_8,
 583     coding_category_utf_16_auto,
 584     coding_category_utf_16_be,
 585     coding_category_utf_16_le,
 586     coding_category_utf_16_be_nosig,
 587     coding_category_utf_16_le_nosig,
 588     coding_category_charset,
 589     coding_category_sjis,
 590     coding_category_big5,
 591     coding_category_ccl,
 592     coding_category_emacs_mule,
 593     /* All above are targets of code detection.  */
 594     coding_category_raw_text,
 595     coding_category_undecided,
 596     coding_category_max
 597   };
 598
 599 /* Definitions of flag bits used in detect_coding_XXXX.  */
 600 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 601 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 602 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 603 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 604 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 605 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 606 #define CATEGORY_MASK_UTF_8             (1 << coding_category_utf_8)
 607 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 608 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 609 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 610 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 611 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 612 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 613 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 614 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 615 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 616 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 617 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 618
 619 /* This value is returned if detect_coding_mask () find nothing other
 620    than ASCII characters.  */
 621 #define CATEGORY_MASK_ANY               \
 622   (CATEGORY_MASK_ISO_7                  \
 623    | CATEGORY_MASK_ISO_7_TIGHT          \
 624    | CATEGORY_MASK_ISO_8_1              \
 625    | CATEGORY_MASK_ISO_8_2              \
 626    | CATEGORY_MASK_ISO_7_ELSE           \
 627    | CATEGORY_MASK_ISO_8_ELSE           \
 628    | CATEGORY_MASK_UTF_8                \
 629    | CATEGORY_MASK_UTF_16_BE            \
 630    | CATEGORY_MASK_UTF_16_LE            \
 631    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 632    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 633    | CATEGORY_MASK_CHARSET              \
 634    | CATEGORY_MASK_SJIS                 \
 635    | CATEGORY_MASK_BIG5                 \
 636    | CATEGORY_MASK_CCL                  \
 637    | CATEGORY_MASK_EMACS_MULE)
 638
 639
 640 #define CATEGORY_MASK_ISO_7BIT \
 641   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 642
 643 #define CATEGORY_MASK_ISO_8BIT \
 644   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 645
 646 #define CATEGORY_MASK_ISO_ELSE \
 647   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 648
 649 #define CATEGORY_MASK_ISO_ESCAPE        \
 650   (CATEGORY_MASK_ISO_7                  \
 651    | CATEGORY_MASK_ISO_7_TIGHT          \
 652    | CATEGORY_MASK_ISO_7_ELSE           \
 653    | CATEGORY_MASK_ISO_8_ELSE)
 654
 655 #define CATEGORY_MASK_ISO       \
 656   (  CATEGORY_MASK_ISO_7BIT     \
 657      | CATEGORY_MASK_ISO_8BIT   \
 658      | CATEGORY_MASK_ISO_ELSE)
 659
 660 #define CATEGORY_MASK_UTF_16            \
 661   (CATEGORY_MASK_UTF_16_BE              \
 662    | CATEGORY_MASK_UTF_16_LE            \
 663    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 664    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 665
 666
 667 /* List of symbols `coding-category-xxx' ordered by priority.  This
 668    variable is exposed to Emacs Lisp.  */
 669 static Lisp_Object Vcoding_category_list;
 670
 671 /* Table of coding categories (Lisp symbols).  This variable is for
 672    internal use oly.  */
 673 static Lisp_Object Vcoding_category_table;
 674
 675 /* Table of coding-categories ordered by priority.  */
 676 static enum coding_category coding_priorities[coding_category_max];
 677
 678 /* Nth element is a coding context for the coding system bound to the
 679    Nth coding category.  */
 680 static struct coding_system coding_categories[coding_category_max];
 681
 682 /*** Commonly used macros and functions ***/
 683
 684 #ifndef min
 685 #define min(a, b) ((a) < (b) ? (a) : (b))
 686 #endif
 687 #ifndef max
 688 #define max(a, b) ((a) > (b) ? (a) : (b))
 689 #endif
 690
 691 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 692   do {                                                  \
 693     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 694     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 695   } while (0)
 696
 697
 698 /* Safely get one byte from the source text pointed by SRC which ends
 699    at SRC_END, and set C to that byte.  If there are not enough bytes
 700    in the source, it jumps to `no_more_source'.  If multibytep is
 701    nonzero, and a multibyte character is found at SRC, set C to the
 702    negative value of the character code.  The caller should declare
 703    and set these variables appropriately in advance:
 704         src, src_end, multibytep */
 705
 706 #define ONE_MORE_BYTE(c)                                \
 707   do {                                                  \
 708     if (src == src_end)                                 \
 709       {                                                 \
 710         if (src_base < src)                             \
 711           record_conversion_result                      \
 712             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 713         goto no_more_source;                            \
 714       }                                                 \
 715     c = *src++;                                         \
 716     if (multibytep && (c & 0x80))                       \
 717       {                                                 \
 718         if ((c & 0xFE) == 0xC0)                         \
 719           c = ((c & 1) << 6) | *src++;                  \
 720         else                                            \
 721           {                                             \
 722             src--;                                      \
 723             c = - string_char (src, &src, NULL);        \
 724             record_conversion_result                    \
 725               (coding, CODING_RESULT_INVALID_SRC);      \
 726           }                                             \
 727       }                                                 \
 728     consumed_chars++;                                   \
 729   } while (0)
 730
 731
 732 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 733   do {                                                  \
 734     c = *src++;                                         \
 735     if (multibytep && (c & 0x80))                       \
 736       {                                                 \
 737         if ((c & 0xFE) == 0xC0)                         \
 738           c = ((c & 1) << 6) | *src++;                  \
 739         else                                            \
 740           {                                             \
 741             src--;                                      \
 742             c = - string_char (src, &src, NULL);        \
 743             record_conversion_result                    \
 744               (coding, CODING_RESULT_INVALID_SRC);      \
 745           }                                             \
 746       }                                                 \
 747     consumed_chars++;                                   \
 748   } while (0)
 749
 750
 751 /* Store a byte C in the place pointed by DST and increment DST to the
 752    next free point, and increment PRODUCED_CHARS.  The caller should
 753    assure that C is 0..127, and declare and set the variable `dst'
 754    appropriately in advance.
 755 */
 756
 757
 758 #define EMIT_ONE_ASCII_BYTE(c)  \
 759   do {                          \
 760     produced_chars++;           \
 761     *dst++ = (c);               \
 762   } while (0)
 763
 764
 765 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 766
 767 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 768   do {                                  \
 769     produced_chars += 2;                \
 770     *dst++ = (c1), *dst++ = (c2);       \
 771   } while (0)
 772
 773
 774 /* Store a byte C in the place pointed by DST and increment DST to the
 775    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 776    nonzero, store in an appropriate multibyte from.  The caller should
 777    declare and set the variables `dst' and `multibytep' appropriately
 778    in advance.  */
 779
 780 #define EMIT_ONE_BYTE(c)                \
 781   do {                                  \
 782     produced_chars++;                   \
 783     if (multibytep)                     \
 784       {                                 \
 785         int ch = (c);                   \
 786         if (ch >= 0x80)                 \
 787           ch = BYTE8_TO_CHAR (ch);      \
 788         CHAR_STRING_ADVANCE (ch, dst);  \
 789       }                                 \
 790     else                                \
 791       *dst++ = (c);                     \
 792   } while (0)
 793
 794
 795 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 796
 797 #define EMIT_TWO_BYTES(c1, c2)          \
 798   do {                                  \
 799     produced_chars += 2;                \
 800     if (multibytep)                     \
 801       {                                 \
 802         int ch;                         \
 803                                         \
 804         ch = (c1);                      \
 805         if (ch >= 0x80)                 \
 806           ch = BYTE8_TO_CHAR (ch);      \
 807         CHAR_STRING_ADVANCE (ch, dst);  \
 808         ch = (c2);                      \
 809         if (ch >= 0x80)                 \
 810           ch = BYTE8_TO_CHAR (ch);      \
 811         CHAR_STRING_ADVANCE (ch, dst);  \
 812       }                                 \
 813     else                                \
 814       {                                 \
 815         *dst++ = (c1);                  \
 816         *dst++ = (c2);                  \
 817       }                                 \
 818   } while (0)
 819
 820
 821 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 822   do {                                  \
 823     EMIT_ONE_BYTE (c1);                 \
 824     EMIT_TWO_BYTES (c2, c3);            \
 825   } while (0)
 826
 827
 828 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 829   do {                                          \
 830     EMIT_TWO_BYTES (c1, c2);                    \
 831     EMIT_TWO_BYTES (c3, c4);                    \
 832   } while (0)
 833
 834
 835 /* Prototypes for static functions.  */
 836 static void record_conversion_result P_ ((struct coding_system *coding,
 837                                           enum coding_result_code result));
 838 static int detect_coding_utf_8 P_ ((struct coding_system *,
 839                                     struct coding_detection_info *info));
 840 static void decode_coding_utf_8 P_ ((struct coding_system *));
 841 static int encode_coding_utf_8 P_ ((struct coding_system *));
 842
 843 static int detect_coding_utf_16 P_ ((struct coding_system *,
 844                                      struct coding_detection_info *info));
 845 static void decode_coding_utf_16 P_ ((struct coding_system *));
 846 static int encode_coding_utf_16 P_ ((struct coding_system *));
 847
 848 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 849                                        struct coding_detection_info *info));
 850 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 851 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 852
 853 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 854                                          struct coding_detection_info *info));
 855 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 856 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 857
 858 static int detect_coding_sjis P_ ((struct coding_system *,
 859                                    struct coding_detection_info *info));
 860 static void decode_coding_sjis P_ ((struct coding_system *));
 861 static int encode_coding_sjis P_ ((struct coding_system *));
 862
 863 static int detect_coding_big5 P_ ((struct coding_system *,
 864                                    struct coding_detection_info *info));
 865 static void decode_coding_big5 P_ ((struct coding_system *));
 866 static int encode_coding_big5 P_ ((struct coding_system *));
 867
 868 static int detect_coding_ccl P_ ((struct coding_system *,
 869                                   struct coding_detection_info *info));
 870 static void decode_coding_ccl P_ ((struct coding_system *));
 871 static int encode_coding_ccl P_ ((struct coding_system *));
 872
 873 static void decode_coding_raw_text P_ ((struct coding_system *));
 874 static int encode_coding_raw_text P_ ((struct coding_system *));
 875
 876 static void coding_set_source P_ ((struct coding_system *));
 877 static void coding_set_destination P_ ((struct coding_system *));
 878 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 879 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 880                                             EMACS_INT));
 881 static unsigned char *alloc_destination P_ ((struct coding_system *,
 882                                              EMACS_INT, unsigned char *));
 883 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 884 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 885                                                      int *, int *,
 886                                                      unsigned char *));
 887 static int detect_eol P_ ((const unsigned char *,
 888                            EMACS_INT, enum coding_category));
 889 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 890 static void decode_eol P_ ((struct coding_system *));
 891 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 892 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
 893                                         int, int *, int *));
 894 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 895 static INLINE void produce_composition P_ ((struct coding_system *, int *,
 896                                             EMACS_INT));
 897 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 898                                         EMACS_INT));
 899 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 900 static int decode_coding P_ ((struct coding_system *));
 901 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 902                                                       struct coding_system *,
 903                                                       int *, EMACS_INT *));
 904 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 905                                                   struct coding_system *,
 906                                                   int *, EMACS_INT *));
 907 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 908 static int encode_coding P_ ((struct coding_system *));
 909 static Lisp_Object make_conversion_work_buffer P_ ((int));
 910 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 911 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 912 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 913
 914 static void
 915 record_conversion_result (struct coding_system *coding,
 916                           enum coding_result_code result)
 917 {
 918   coding->result = result;
 919   switch (result)
 920     {
 921     case CODING_RESULT_INSUFFICIENT_SRC:
 922       Vlast_code_conversion_error = Qinsufficient_source;
 923       break;
 924     case CODING_RESULT_INCONSISTENT_EOL:
 925       Vlast_code_conversion_error = Qinconsistent_eol;
 926       break;
 927     case CODING_RESULT_INVALID_SRC:
 928       Vlast_code_conversion_error = Qinvalid_source;
 929       break;
 930     case CODING_RESULT_INTERRUPT:
 931       Vlast_code_conversion_error = Qinterrupted;
 932       break;
 933     case CODING_RESULT_INSUFFICIENT_MEM:
 934       Vlast_code_conversion_error = Qinsufficient_memory;
 935       break;
 936     default:
 937       Vlast_code_conversion_error = intern ("Unknown error");
 938     }
 939 }
 940
 941 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 942   do {                                                                       \
 943     charset_map_loaded = 0;                                                  \
 944     c = DECODE_CHAR (charset, code);                                         \
 945     if (charset_map_loaded)                                                  \
 946       {                                                                      \
 947         const unsigned char *orig = coding->source;                          \
 948         EMACS_INT offset;                                                    \
 949                                                                              \
 950         coding_set_source (coding);                                          \
 951         offset = coding->source - orig;                                      \
 952         src += offset;                                                       \
 953         src_base += offset;                                                  \
 954         src_end += offset;                                                   \
 955       }                                                                      \
 956   } while (0)
 957
 958
 959 #define ASSURE_DESTINATION(bytes)                               \
 960   do {                                                          \
 961     if (dst + (bytes) >= dst_end)                               \
 962       {                                                         \
 963         int more_bytes = charbuf_end - charbuf + (bytes);       \
 964                                                                 \
 965         dst = alloc_destination (coding, more_bytes, dst);      \
 966         dst_end = coding->destination + coding->dst_bytes;      \
 967       }                                                         \
 968   } while (0)
 969
 970
 971
 972 static void
 973 coding_set_source (coding)
 974      struct coding_system *coding;
 975 {
 976   if (BUFFERP (coding->src_object))
 977     {
 978       struct buffer *buf = XBUFFER (coding->src_object);
 979
 980       if (coding->src_pos < 0)
 981         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
 982       else
 983         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
 984     }
 985   else if (STRINGP (coding->src_object))
 986     {
 987       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
 988     }
 989   else
 990     /* Otherwise, the source is C string and is never relocated
 991        automatically.  Thus we don't have to update anything.  */
 992     ;
 993 }
 994
 995 static void
 996 coding_set_destination (coding)
 997      struct coding_system *coding;
 998 {
 999   if (BUFFERP (coding->dst_object))
1000     {
1001       if (coding->src_pos < 0)
1002         {
1003           coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
1004           coding->dst_bytes = (GAP_END_ADDR
1005                                - (coding->src_bytes - coding->consumed)
1006                                - coding->destination);
1007         }
1008       else
1009         {
1010           /* We are sure that coding->dst_pos_byte is before the gap
1011              of the buffer. */
1012           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1013                                  + coding->dst_pos_byte - 1);
1014           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1015                                - coding->destination);
1016         }
1017     }
1018   else
1019     /* Otherwise, the destination is C string and is never relocated
1020        automatically.  Thus we don't have to update anything.  */
1021     ;
1022 }
1023
1024
1025 static void
1026 coding_alloc_by_realloc (coding, bytes)
1027      struct coding_system *coding;
1028      EMACS_INT bytes;
1029 {
1030   coding->destination = (unsigned char *) xrealloc (coding->destination,
1031                                                     coding->dst_bytes + bytes);
1032   coding->dst_bytes += bytes;
1033 }
1034
1035 static void
1036 coding_alloc_by_making_gap (coding, bytes)
1037      struct coding_system *coding;
1038      EMACS_INT bytes;
1039 {
1040   if (BUFFERP (coding->dst_object)
1041       && EQ (coding->src_object, coding->dst_object))
1042     {
1043       EMACS_INT add = coding->src_bytes - coding->consumed;
1044
1045       GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1046       make_gap (bytes);
1047       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1048     }
1049   else
1050     {
1051       Lisp_Object this_buffer;
1052
1053       this_buffer = Fcurrent_buffer ();
1054       set_buffer_internal (XBUFFER (coding->dst_object));
1055       make_gap (bytes);
1056       set_buffer_internal (XBUFFER (this_buffer));
1057     }
1058 }
1059
1060
1061 static unsigned char *
1062 alloc_destination (coding, nbytes, dst)
1063      struct coding_system *coding;
1064      EMACS_INT nbytes;
1065      unsigned char *dst;
1066 {
1067   EMACS_INT offset = dst - coding->destination;
1068
1069   if (BUFFERP (coding->dst_object))
1070     coding_alloc_by_making_gap (coding, nbytes);
1071   else
1072     coding_alloc_by_realloc (coding, nbytes);
1073   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1074   coding_set_destination (coding);
1075   dst = coding->destination + offset;
1076   return dst;
1077 }
1078
1079 /** Macros for annotations.  */
1080
1081 /* Maximum length of annotation data (sum of annotations for
1082    composition and charset).  */
1083 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1084
1085 /* An annotation data is stored in the array coding->charbuf in this
1086    format:
1087      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1088    LENGTH is the number of elements in the annotation.
1089    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1090    NCHARS is the number of characters in the text annotated.
1091
1092    The format of the following elements depend on ANNOTATION_MASK.
1093
1094    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1095    follows:
1096      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1097    METHOD is one of enum composition_method.
1098    Optionnal COMPOSITION-COMPONENTS are characters and composition
1099    rules.
1100
1101    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1102    follows.  */
1103
1104 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1105   do {                                                  \
1106     *(buf)++ = -(len);                                  \
1107     *(buf)++ = (mask);                                  \
1108     *(buf)++ = (nchars);                                \
1109     coding->annotated = 1;                              \
1110   } while (0);
1111
1112 #define ADD_COMPOSITION_DATA(buf, nchars, method)                           \
1113   do {                                                                      \
1114     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1115     *buf++ = method;                                                        \
1116   } while (0)
1117
1118
1119 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1120   do {                                                                  \
1121     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1122     *buf++ = id;                                                        \
1123   } while (0)
1124
1125 \f
1126 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1127
1128
1129
1130 \f
1131 /*** 3. UTF-8 ***/
1132
1133 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1134    Check if a text is encoded in UTF-8.  If it is, return 1, else
1135    return 0.  */
1136
1137 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1138 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1139 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1140 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1141 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1142 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1143
1144 static int
1145 detect_coding_utf_8 (coding, detect_info)
1146      struct coding_system *coding;
1147      struct coding_detection_info *detect_info;
1148 {
1149   const unsigned char *src = coding->source, *src_base;
1150   const unsigned char *src_end = coding->source + coding->src_bytes;
1151   int multibytep = coding->src_multibyte;
1152   int consumed_chars = 0;
1153   int found = 0;
1154
1155   detect_info->checked |= CATEGORY_MASK_UTF_8;
1156   /* A coding system of this category is always ASCII compatible.  */
1157   src += coding->head_ascii;
1158
1159   while (1)
1160     {
1161       int c, c1, c2, c3, c4;
1162
1163       src_base = src;
1164       ONE_MORE_BYTE (c);
1165       if (c < 0 || UTF_8_1_OCTET_P (c))
1166         continue;
1167       ONE_MORE_BYTE (c1);
1168       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1169         break;
1170       if (UTF_8_2_OCTET_LEADING_P (c))
1171         {
1172           found = CATEGORY_MASK_UTF_8;
1173           continue;
1174         }
1175       ONE_MORE_BYTE (c2);
1176       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1177         break;
1178       if (UTF_8_3_OCTET_LEADING_P (c))
1179         {
1180           found = CATEGORY_MASK_UTF_8;
1181           continue;
1182         }
1183       ONE_MORE_BYTE (c3);
1184       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1185         break;
1186       if (UTF_8_4_OCTET_LEADING_P (c))
1187         {
1188           found = CATEGORY_MASK_UTF_8;
1189           continue;
1190         }
1191       ONE_MORE_BYTE (c4);
1192       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1193         break;
1194       if (UTF_8_5_OCTET_LEADING_P (c))
1195         {
1196           found = CATEGORY_MASK_UTF_8;
1197           continue;
1198         }
1199       break;
1200     }
1201   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1202   return 0;
1203
1204  no_more_source:
1205   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1206     {
1207       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1208       return 0;
1209     }
1210   detect_info->found |= found;
1211   return 1;
1212 }
1213
1214
1215 static void
1216 decode_coding_utf_8 (coding)
1217      struct coding_system *coding;
1218 {
1219   const unsigned char *src = coding->source + coding->consumed;
1220   const unsigned char *src_end = coding->source + coding->src_bytes;
1221   const unsigned char *src_base;
1222   int *charbuf = coding->charbuf + coding->charbuf_used;
1223   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1224   int consumed_chars = 0, consumed_chars_base;
1225   int multibytep = coding->src_multibyte;
1226   Lisp_Object attr, charset_list;
1227
1228   CODING_GET_INFO (coding, attr, charset_list);
1229
1230   while (1)
1231     {
1232       int c, c1, c2, c3, c4, c5;
1233
1234       src_base = src;
1235       consumed_chars_base = consumed_chars;
1236
1237       if (charbuf >= charbuf_end)
1238         break;
1239
1240       ONE_MORE_BYTE (c1);
1241       if (c1 < 0)
1242         {
1243           c = - c1;
1244         }
1245       else if (UTF_8_1_OCTET_P(c1))
1246         {
1247           c = c1;
1248         }
1249       else
1250         {
1251           ONE_MORE_BYTE (c2);
1252           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1253             goto invalid_code;
1254           if (UTF_8_2_OCTET_LEADING_P (c1))
1255             {
1256               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1257               /* Reject overlong sequences here and below.  Encoders
1258                  producing them are incorrect, they can be misleading,
1259                  and they mess up read/write invariance.  */
1260               if (c < 128)
1261                 goto invalid_code;
1262             }
1263           else
1264             {
1265               ONE_MORE_BYTE (c3);
1266               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1267                 goto invalid_code;
1268               if (UTF_8_3_OCTET_LEADING_P (c1))
1269                 {
1270                   c = (((c1 & 0xF) << 12)
1271                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1272                   if (c < 0x800
1273                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1274                     goto invalid_code;
1275                 }
1276               else
1277                 {
1278                   ONE_MORE_BYTE (c4);
1279                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1280                     goto invalid_code;
1281                   if (UTF_8_4_OCTET_LEADING_P (c1))
1282                     {
1283                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1284                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1285                     if (c < 0x10000)
1286                       goto invalid_code;
1287                     }
1288                   else
1289                     {
1290                       ONE_MORE_BYTE (c5);
1291                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1292                         goto invalid_code;
1293                       if (UTF_8_5_OCTET_LEADING_P (c1))
1294                         {
1295                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1296                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1297                                | (c5 & 0x3F));
1298                           if ((c > MAX_CHAR) || (c < 0x200000))
1299                             goto invalid_code;
1300                         }
1301                       else
1302                         goto invalid_code;
1303                     }
1304                 }
1305             }
1306         }
1307
1308       *charbuf++ = c;
1309       continue;
1310
1311     invalid_code:
1312       src = src_base;
1313       consumed_chars = consumed_chars_base;
1314       ONE_MORE_BYTE (c);
1315       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1316       coding->errors++;
1317     }
1318
1319  no_more_source:
1320   coding->consumed_char += consumed_chars_base;
1321   coding->consumed = src_base - coding->source;
1322   coding->charbuf_used = charbuf - coding->charbuf;
1323 }
1324
1325
1326 static int
1327 encode_coding_utf_8 (coding)
1328      struct coding_system *coding;
1329 {
1330   int multibytep = coding->dst_multibyte;
1331   int *charbuf = coding->charbuf;
1332   int *charbuf_end = charbuf + coding->charbuf_used;
1333   unsigned char *dst = coding->destination + coding->produced;
1334   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1335   int produced_chars = 0;
1336   int c;
1337
1338   if (multibytep)
1339     {
1340       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1341
1342       while (charbuf < charbuf_end)
1343         {
1344           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1345
1346           ASSURE_DESTINATION (safe_room);
1347           c = *charbuf++;
1348           if (CHAR_BYTE8_P (c))
1349             {
1350               c = CHAR_TO_BYTE8 (c);
1351               EMIT_ONE_BYTE (c);
1352             }
1353           else
1354             {
1355               CHAR_STRING_ADVANCE (c, pend);
1356               for (p = str; p < pend; p++)
1357                 EMIT_ONE_BYTE (*p);
1358             }
1359         }
1360     }
1361   else
1362     {
1363       int safe_room = MAX_MULTIBYTE_LENGTH;
1364
1365       while (charbuf < charbuf_end)
1366         {
1367           ASSURE_DESTINATION (safe_room);
1368           c = *charbuf++;
1369           if (CHAR_BYTE8_P (c))
1370             *dst++ = CHAR_TO_BYTE8 (c);
1371           else
1372             dst += CHAR_STRING (c, dst);
1373           produced_chars++;
1374         }
1375     }
1376   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1377   coding->produced_char += produced_chars;
1378   coding->produced = dst - coding->destination;
1379   return 0;
1380 }
1381
1382
1383 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1384    Check if a text is encoded in one of UTF-16 based coding systems.
1385    If it is, return 1, else return 0.  */
1386
1387 #define UTF_16_HIGH_SURROGATE_P(val) \
1388   (((val) & 0xFC00) == 0xD800)
1389
1390 #define UTF_16_LOW_SURROGATE_P(val) \
1391   (((val) & 0xFC00) == 0xDC00)
1392
1393 #define UTF_16_INVALID_P(val)   \
1394   (((val) == 0xFFFE)            \
1395    || ((val) == 0xFFFF)         \
1396    || UTF_16_LOW_SURROGATE_P (val))
1397
1398
1399 static int
1400 detect_coding_utf_16 (coding, detect_info)
1401      struct coding_system *coding;
1402      struct coding_detection_info *detect_info;
1403 {
1404   const unsigned char *src = coding->source, *src_base = src;
1405   const unsigned char *src_end = coding->source + coding->src_bytes;
1406   int multibytep = coding->src_multibyte;
1407   int consumed_chars = 0;
1408   int c1, c2;
1409
1410   detect_info->checked |= CATEGORY_MASK_UTF_16;
1411   if (coding->mode & CODING_MODE_LAST_BLOCK
1412       && (coding->src_chars & 1))
1413     {
1414       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1415       return 0;
1416     }
1417
1418   ONE_MORE_BYTE (c1);
1419   ONE_MORE_BYTE (c2);
1420   if ((c1 == 0xFF) && (c2 == 0xFE))
1421     {
1422       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1423                              | CATEGORY_MASK_UTF_16_AUTO);
1424       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1425                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1426                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1427     }
1428   else if ((c1 == 0xFE) && (c2 == 0xFF))
1429     {
1430       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1431                              | CATEGORY_MASK_UTF_16_AUTO);
1432       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1433                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1434                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1435     }
1436   else if (c1 >= 0 && c2 >= 0)
1437     {
1438       detect_info->rejected
1439         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1440     }
1441  no_more_source:
1442   return 1;
1443 }
1444
1445 static void
1446 decode_coding_utf_16 (coding)
1447      struct coding_system *coding;
1448 {
1449   const unsigned char *src = coding->source + coding->consumed;
1450   const unsigned char *src_end = coding->source + coding->src_bytes;
1451   const unsigned char *src_base;
1452   int *charbuf = coding->charbuf + coding->charbuf_used;
1453   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1454   int consumed_chars = 0, consumed_chars_base;
1455   int multibytep = coding->src_multibyte;
1456   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1457   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1458   int surrogate = CODING_UTF_16_SURROGATE (coding);
1459   Lisp_Object attr, charset_list;
1460
1461   CODING_GET_INFO (coding, attr, charset_list);
1462
1463   if (bom == utf_16_with_bom)
1464     {
1465       int c, c1, c2;
1466
1467       src_base = src;
1468       ONE_MORE_BYTE (c1);
1469       ONE_MORE_BYTE (c2);
1470       c = (c1 << 8) | c2;
1471
1472       if (endian == utf_16_big_endian
1473           ? c != 0xFEFF : c != 0xFFFE)
1474         {
1475           /* The first two bytes are not BOM.  Treat them as bytes
1476              for a normal character.  */
1477           src = src_base;
1478           coding->errors++;
1479         }
1480       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1481     }
1482   else if (bom == utf_16_detect_bom)
1483     {
1484       /* We have already tried to detect BOM and failed in
1485          detect_coding.  */
1486       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1487     }
1488
1489   while (1)
1490     {
1491       int c, c1, c2;
1492
1493       src_base = src;
1494       consumed_chars_base = consumed_chars;
1495
1496       if (charbuf + 2 >= charbuf_end)
1497         break;
1498
1499       ONE_MORE_BYTE (c1);
1500       if (c1 < 0)
1501         {
1502           *charbuf++ = -c1;
1503           continue;
1504         }
1505       ONE_MORE_BYTE (c2);
1506       if (c2 < 0)
1507         {
1508           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1509           *charbuf++ = -c2;
1510           continue;
1511         }
1512       c = (endian == utf_16_big_endian
1513            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1514       if (surrogate)
1515         {
1516           if (! UTF_16_LOW_SURROGATE_P (c))
1517             {
1518               if (endian == utf_16_big_endian)
1519                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1520               else
1521                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1522               *charbuf++ = c1;
1523               *charbuf++ = c2;
1524               coding->errors++;
1525               if (UTF_16_HIGH_SURROGATE_P (c))
1526                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1527               else
1528                 *charbuf++ = c;
1529             }
1530           else
1531             {
1532               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1533               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1534               *charbuf++ = 0x10000 + c;
1535             }
1536         }
1537       else
1538         {
1539           if (UTF_16_HIGH_SURROGATE_P (c))
1540             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1541           else
1542             *charbuf++ = c;
1543         }
1544     }
1545
1546  no_more_source:
1547   coding->consumed_char += consumed_chars_base;
1548   coding->consumed = src_base - coding->source;
1549   coding->charbuf_used = charbuf - coding->charbuf;
1550 }
1551
1552 static int
1553 encode_coding_utf_16 (coding)
1554      struct coding_system *coding;
1555 {
1556   int multibytep = coding->dst_multibyte;
1557   int *charbuf = coding->charbuf;
1558   int *charbuf_end = charbuf + coding->charbuf_used;
1559   unsigned char *dst = coding->destination + coding->produced;
1560   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1561   int safe_room = 8;
1562   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1563   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1564   int produced_chars = 0;
1565   Lisp_Object attrs, charset_list;
1566   int c;
1567
1568   CODING_GET_INFO (coding, attrs, charset_list);
1569
1570   if (bom != utf_16_without_bom)
1571     {
1572       ASSURE_DESTINATION (safe_room);
1573       if (big_endian)
1574         EMIT_TWO_BYTES (0xFE, 0xFF);
1575       else
1576         EMIT_TWO_BYTES (0xFF, 0xFE);
1577       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1578     }
1579
1580   while (charbuf < charbuf_end)
1581     {
1582       ASSURE_DESTINATION (safe_room);
1583       c = *charbuf++;
1584       if (c >= MAX_UNICODE_CHAR)
1585         c = coding->default_char;
1586
1587       if (c < 0x10000)
1588         {
1589           if (big_endian)
1590             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1591           else
1592             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1593         }
1594       else
1595         {
1596           int c1, c2;
1597
1598           c -= 0x10000;
1599           c1 = (c >> 10) + 0xD800;
1600           c2 = (c & 0x3FF) + 0xDC00;
1601           if (big_endian)
1602             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1603           else
1604             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1605         }
1606     }
1607   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1608   coding->produced = dst - coding->destination;
1609   coding->produced_char += produced_chars;
1610   return 0;
1611 }
1612
1613 \f
1614 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1615
1616 /* Emacs' internal format for representation of multiple character
1617    sets is a kind of multi-byte encoding, i.e. characters are
1618    represented by variable-length sequences of one-byte codes.
1619
1620    ASCII characters and control characters (e.g. `tab', `newline') are
1621    represented by one-byte sequences which are their ASCII codes, in
1622    the range 0x00 through 0x7F.
1623
1624    8-bit characters of the range 0x80..0x9F are represented by
1625    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1626    code + 0x20).
1627
1628    8-bit characters of the range 0xA0..0xFF are represented by
1629    one-byte sequences which are their 8-bit code.
1630
1631    The other characters are represented by a sequence of `base
1632    leading-code', optional `extended leading-code', and one or two
1633    `position-code's.  The length of the sequence is determined by the
1634    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1635    whereas extended leading-code and position-code take the range 0xA0
1636    through 0xFF.  See `charset.h' for more details about leading-code
1637    and position-code.
1638
1639    --- CODE RANGE of Emacs' internal format ---
1640    character set        range
1641    -------------        -----
1642    ascii                0x00..0x7F
1643    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1644    eight-bit-graphic    0xA0..0xBF
1645    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1646    ---------------------------------------------
1647
1648    As this is the internal character representation, the format is
1649    usually not used externally (i.e. in a file or in a data sent to a
1650    process).  But, it is possible to have a text externally in this
1651    format (i.e. by encoding by the coding system `emacs-mule').
1652
1653    In that case, a sequence of one-byte codes has a slightly different
1654    form.
1655
1656    At first, all characters in eight-bit-control are represented by
1657    one-byte sequences which are their 8-bit code.
1658
1659    Next, character composition data are represented by the byte
1660    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1661    where,
1662         METHOD is 0xF0 plus one of composition method (enum
1663         composition_method),
1664
1665         BYTES is 0xA0 plus a byte length of this composition data,
1666
1667         CHARS is 0x20 plus a number of characters composed by this
1668         data,
1669
1670         COMPONENTs are characters of multibye form or composition
1671         rules encoded by two-byte of ASCII codes.
1672
1673    In addition, for backward compatibility, the following formats are
1674    also recognized as composition data on decoding.
1675
1676    0x80 MSEQ ...
1677    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1678
1679    Here,
1680         MSEQ is a multibyte form but in these special format:
1681           ASCII: 0xA0 ASCII_CODE+0x80,
1682           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1683         RULE is a one byte code of the range 0xA0..0xF0 that
1684         represents a composition rule.
1685   */
1686
1687 char emacs_mule_bytes[256];
1688
1689 int
1690 emacs_mule_char (coding, src, nbytes, nchars, id)
1691      struct coding_system *coding;
1692      const unsigned char *src;
1693      int *nbytes, *nchars, *id;
1694 {
1695   const unsigned char *src_end = coding->source + coding->src_bytes;
1696   const unsigned char *src_base = src;
1697   int multibytep = coding->src_multibyte;
1698   struct charset *charset;
1699   unsigned code;
1700   int c;
1701   int consumed_chars = 0;
1702
1703   ONE_MORE_BYTE (c);
1704   if (c < 0)
1705     {
1706       c = -c;
1707       charset = emacs_mule_charset[0];
1708     }
1709   else
1710     {
1711       if (c >= 0xA0)
1712         {
1713           /* Old style component character of a compostion.  */
1714           if (c == 0xA0)
1715             {
1716               ONE_MORE_BYTE (c);
1717               c -= 0x80;
1718             }
1719           else
1720             c -= 0x20;
1721         }
1722
1723       switch (emacs_mule_bytes[c])
1724         {
1725         case 2:
1726           if (! (charset = emacs_mule_charset[c]))
1727             goto invalid_code;
1728           ONE_MORE_BYTE (c);
1729           if (c < 0xA0)
1730             goto invalid_code;
1731           code = c & 0x7F;
1732           break;
1733
1734         case 3:
1735           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1736               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1737             {
1738               ONE_MORE_BYTE (c);
1739               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
1740                 goto invalid_code;
1741               ONE_MORE_BYTE (c);
1742               if (c < 0xA0)
1743                 goto invalid_code;
1744               code = c & 0x7F;
1745             }
1746           else
1747             {
1748               if (! (charset = emacs_mule_charset[c]))
1749                 goto invalid_code;
1750               ONE_MORE_BYTE (c);
1751               if (c < 0xA0)
1752                 goto invalid_code;
1753               code = (c & 0x7F) << 8;
1754               ONE_MORE_BYTE (c);
1755               if (c < 0xA0)
1756                 goto invalid_code;
1757               code |= c & 0x7F;
1758             }
1759           break;
1760
1761         case 4:
1762           ONE_MORE_BYTE (c);
1763           if (c < 0 || ! (charset = emacs_mule_charset[c]))
1764             goto invalid_code;
1765           ONE_MORE_BYTE (c);
1766           if (c < 0xA0)
1767             goto invalid_code;
1768           code = (c & 0x7F) << 8;
1769           ONE_MORE_BYTE (c);
1770           if (c < 0xA0)
1771             goto invalid_code;
1772           code |= c & 0x7F;
1773           break;
1774
1775         case 1:
1776           code = c;
1777           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1778                                      ? charset_ascii : charset_eight_bit);
1779           break;
1780
1781         default:
1782           abort ();
1783         }
1784       c = DECODE_CHAR (charset, code);
1785       if (c < 0)
1786         goto invalid_code;
1787     }
1788   *nbytes = src - src_base;
1789   *nchars = consumed_chars;
1790   if (id)
1791     *id = charset->id;
1792   return c;
1793
1794  no_more_source:
1795   return -2;
1796
1797  invalid_code:
1798   return -1;
1799 }
1800
1801
1802 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1803    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1804    else return 0.  */
1805
1806 static int
1807 detect_coding_emacs_mule (coding, detect_info)
1808      struct coding_system *coding;
1809      struct coding_detection_info *detect_info;
1810 {
1811   const unsigned char *src = coding->source, *src_base;
1812   const unsigned char *src_end = coding->source + coding->src_bytes;
1813   int multibytep = coding->src_multibyte;
1814   int consumed_chars = 0;
1815   int c;
1816   int found = 0;
1817
1818   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1819   /* A coding system of this category is always ASCII compatible.  */
1820   src += coding->head_ascii;
1821
1822   while (1)
1823     {
1824       src_base = src;
1825       ONE_MORE_BYTE (c);
1826       if (c < 0)
1827         continue;
1828       if (c == 0x80)
1829         {
1830           /* Perhaps the start of composite character.  We simple skip
1831              it because analyzing it is too heavy for detecting.  But,
1832              at least, we check that the composite character
1833              constitues of more than 4 bytes.  */
1834           const unsigned char *src_base;
1835
1836         repeat:
1837           src_base = src;
1838           do
1839             {
1840               ONE_MORE_BYTE (c);
1841             }
1842           while (c >= 0xA0);
1843
1844           if (src - src_base <= 4)
1845             break;
1846           found = CATEGORY_MASK_EMACS_MULE;
1847           if (c == 0x80)
1848             goto repeat;
1849         }
1850
1851       if (c < 0x80)
1852         {
1853           if (c < 0x20
1854               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1855             break;
1856         }
1857       else
1858         {
1859           int more_bytes = emacs_mule_bytes[*src_base] - 1;
1860
1861           while (more_bytes > 0)
1862             {
1863               ONE_MORE_BYTE (c);
1864               if (c < 0xA0)
1865                 {
1866                   src--;        /* Unread the last byte.  */
1867                   break;
1868                 }
1869               more_bytes--;
1870             }
1871           if (more_bytes != 0)
1872             break;
1873           found = CATEGORY_MASK_EMACS_MULE;
1874         }
1875     }
1876   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1877   return 0;
1878
1879  no_more_source:
1880   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1881     {
1882       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1883       return 0;
1884     }
1885   detect_info->found |= found;
1886   return 1;
1887 }
1888
1889
1890 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1891
1892 /* Decode a character represented as a component of composition
1893    sequence of Emacs 20/21 style at SRC.  Set C to that character and
1894    update SRC to the head of next character (or an encoded composition
1895    rule).  If SRC doesn't points a composition component, set C to -1.
1896    If SRC points an invalid byte sequence, global exit by a return
1897    value 0.  */
1898
1899 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
1900   if (1)                                                        \
1901     {                                                           \
1902       int c;                                                    \
1903       int nbytes, nchars;                                       \
1904                                                                 \
1905       if (src == src_end)                                       \
1906         break;                                                  \
1907       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1908       if (c < 0)                                                \
1909         {                                                       \
1910           if (c == -2)                                          \
1911             break;                                              \
1912           goto invalid_code;                                    \
1913         }                                                       \
1914       *buf++ = c;                                               \
1915       src += nbytes;                                            \
1916       consumed_chars += nchars;                                 \
1917     }                                                           \
1918   else
1919
1920
1921 /* Decode a composition rule represented as a component of composition
1922    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
1923    and increment BUF.  If SRC points an invalid byte sequence, set C
1924    to -1.  */
1925
1926 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
1927   do {                                                  \
1928     int c, gref, nref;                                  \
1929                                                         \
1930     if (src >= src_end)                                 \
1931       goto invalid_code;                                \
1932     ONE_MORE_BYTE_NO_CHECK (c);                         \
1933     c -= 0xA0;                                          \
1934     if (c < 0 || c >= 81)                               \
1935       goto invalid_code;                                \
1936                                                         \
1937     gref = c / 9, nref = c % 9;                         \
1938     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1939   } while (0)
1940
1941
1942 /* Decode a composition rule represented as a component of composition
1943    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
1944    and increment BUF.  If SRC points an invalid byte sequence, set C
1945    to -1.  */
1946
1947 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
1948   do {                                                  \
1949     int gref, nref;                                     \
1950                                                         \
1951     if (src + 1>= src_end)                              \
1952       goto invalid_code;                                \
1953     ONE_MORE_BYTE_NO_CHECK (gref);                      \
1954     gref -= 0x20;                                       \
1955     ONE_MORE_BYTE_NO_CHECK (nref);                      \
1956     nref -= 0x20;                                       \
1957     if (gref < 0 || gref >= 81                          \
1958         || nref < 0 || nref >= 81)                      \
1959       goto invalid_code;                                \
1960     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1961   } while (0)
1962
1963
1964 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
1965   do {                                                                  \
1966     /* Emacs 21 style format.  The first three bytes at SRC are         \
1967        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
1968        the byte length of this composition information, CHARS is the    \
1969        number of characters composed by this composition.  */           \
1970     enum composition_method method = c - 0xF2;                          \
1971     int *charbuf_base = charbuf;                                        \
1972     int consumed_chars_limit;                                           \
1973     int nbytes, nchars;                                                 \
1974                                                                         \
1975     ONE_MORE_BYTE (c);                                                  \
1976     if (c < 0)                                                          \
1977       goto invalid_code;                                                \
1978     nbytes = c - 0xA0;                                                  \
1979     if (nbytes < 3)                                                     \
1980       goto invalid_code;                                                \
1981     ONE_MORE_BYTE (c);                                                  \
1982     if (c < 0)                                                          \
1983       goto invalid_code;                                                \
1984     nchars = c - 0xA0;                                                  \
1985     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
1986     consumed_chars_limit = consumed_chars_base + nbytes;                \
1987     if (method != COMPOSITION_RELATIVE)                                 \
1988       {                                                                 \
1989         int i = 0;                                                      \
1990         while (consumed_chars < consumed_chars_limit)                   \
1991           {                                                             \
1992             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
1993               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
1994             else                                                        \
1995               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
1996             i++;                                                        \
1997           }                                                             \
1998         if (consumed_chars < consumed_chars_limit)                      \
1999           goto invalid_code;                                            \
2000         charbuf_base[0] -= i;                                           \
2001       }                                                                 \
2002   } while (0)
2003
2004
2005 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)                    \
2006   do {                                                                  \
2007     /* Emacs 20 style format for relative composition.  */              \
2008     /* Store multibyte form of characters to be composed.  */           \
2009     enum composition_method method = COMPOSITION_RELATIVE;              \
2010     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];                 \
2011     int *buf = components;                                              \
2012     int i, j;                                                           \
2013                                                                         \
2014     src = src_base;                                                     \
2015     ONE_MORE_BYTE (c);          /* skip 0x80 */                         \
2016     for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++)    \
2017       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                         \
2018     if (i < 2)                                                          \
2019       goto invalid_code;                                                \
2020     ADD_COMPOSITION_DATA (charbuf, i, method);                          \
2021     for (j = 0; j < i; j++)                                             \
2022       *charbuf++ = components[j];                                       \
2023   } while (0)
2024
2025
2026 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
2027   do {                                                          \
2028     /* Emacs 20 style format for rule-base composition.  */     \
2029     /* Store multibyte form of characters to be composed.  */   \
2030     enum composition_method method = COMPOSITION_WITH_RULE;     \
2031     int *charbuf_base = charbuf;                                \
2032     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
2033     int *buf = components;                                      \
2034     int i, j;                                                   \
2035                                                                 \
2036     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
2037     for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++)            \
2038       {                                                         \
2039         if (*src < 0xA0)                                        \
2040           break;                                                \
2041         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
2042         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
2043       }                                                         \
2044     if (i <= 1 || (buf - components) % 2 == 0)                  \
2045       goto invalid_code;                                        \
2046     if (charbuf + i + (i / 2) + 1 >= charbuf_end)               \
2047       goto no_more_source;                                      \
2048     ADD_COMPOSITION_DATA (charbuf, i, method);                  \
2049     i = i * 2 - 1;                                              \
2050     for (j = 0; j < i; j++)                                     \
2051       *charbuf++ = components[j];                               \
2052     charbuf_base[0] -= i;                                       \
2053     for (j = 0; j < i; j += 2)                                  \
2054       *charbuf++ = components[j];                               \
2055   } while (0)
2056
2057
2058 static void
2059 decode_coding_emacs_mule (coding)
2060      struct coding_system *coding;
2061 {
2062   const unsigned char *src = coding->source + coding->consumed;
2063   const unsigned char *src_end = coding->source + coding->src_bytes;
2064   const unsigned char *src_base;
2065   int *charbuf = coding->charbuf + coding->charbuf_used;
2066   int *charbuf_end
2067     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2068   int consumed_chars = 0, consumed_chars_base;
2069   int multibytep = coding->src_multibyte;
2070   Lisp_Object attrs, charset_list;
2071   int char_offset = coding->produced_char;
2072   int last_offset = char_offset;
2073   int last_id = charset_ascii;
2074
2075   CODING_GET_INFO (coding, attrs, charset_list);
2076
2077   while (1)
2078     {
2079       int c;
2080
2081       src_base = src;
2082       consumed_chars_base = consumed_chars;
2083
2084       if (charbuf >= charbuf_end)
2085         break;
2086
2087       ONE_MORE_BYTE (c);
2088       if (c < 0)
2089         {
2090           *charbuf++ = -c;
2091           char_offset++;
2092         }
2093       else if (c < 0x80)
2094         {
2095           *charbuf++ = c;
2096           char_offset++;
2097         }
2098       else if (c == 0x80)
2099         {
2100           ONE_MORE_BYTE (c);
2101           if (c < 0)
2102             goto invalid_code;
2103           if (c - 0xF2 >= COMPOSITION_RELATIVE
2104               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2105             DECODE_EMACS_MULE_21_COMPOSITION (c);
2106           else if (c < 0xC0)
2107             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2108           else if (c == 0xFF)
2109             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2110           else
2111             goto invalid_code;
2112         }
2113       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2114         {
2115           int nbytes, nchars;
2116           int id;
2117
2118           src = src_base;
2119           consumed_chars = consumed_chars_base;
2120           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2121           if (c < 0)
2122             {
2123               if (c == -2)
2124                 break;
2125               goto invalid_code;
2126             }
2127           if (last_id != id)
2128             {
2129               if (last_id != charset_ascii)
2130                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2131               last_id = id;
2132               last_offset = char_offset;
2133             }
2134           *charbuf++ = c;
2135           src += nbytes;
2136           consumed_chars += nchars;
2137           char_offset++;
2138         }
2139       else
2140         goto invalid_code;
2141       continue;
2142
2143     invalid_code:
2144       src = src_base;
2145       consumed_chars = consumed_chars_base;
2146       ONE_MORE_BYTE (c);
2147       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2148       char_offset++;
2149       coding->errors++;
2150     }
2151
2152  no_more_source:
2153   if (last_id != charset_ascii)
2154     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2155   coding->consumed_char += consumed_chars_base;
2156   coding->consumed = src_base - coding->source;
2157   coding->charbuf_used = charbuf - coding->charbuf;
2158 }
2159
2160
2161 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2162   do {                                          \
2163     if (id < 0xA0)                              \
2164       codes[0] = id, codes[1] = 0;              \
2165     else if (id < 0xE0)                         \
2166       codes[0] = 0x9A, codes[1] = id;           \
2167     else if (id < 0xF0)                         \
2168       codes[0] = 0x9B, codes[1] = id;           \
2169     else if (id < 0xF5)                         \
2170       codes[0] = 0x9C, codes[1] = id;           \
2171     else                                        \
2172       codes[0] = 0x9D, codes[1] = id;           \
2173   } while (0);
2174
2175
2176 static int
2177 encode_coding_emacs_mule (coding)
2178      struct coding_system *coding;
2179 {
2180   int multibytep = coding->dst_multibyte;
2181   int *charbuf = coding->charbuf;
2182   int *charbuf_end = charbuf + coding->charbuf_used;
2183   unsigned char *dst = coding->destination + coding->produced;
2184   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2185   int safe_room = 8;
2186   int produced_chars = 0;
2187   Lisp_Object attrs, charset_list;
2188   int c;
2189   int preferred_charset_id = -1;
2190
2191   CODING_GET_INFO (coding, attrs, charset_list);
2192   if (! EQ (charset_list, Vemacs_mule_charset_list))
2193     {
2194       CODING_ATTR_CHARSET_LIST (attrs)
2195         = charset_list = Vemacs_mule_charset_list;
2196     }
2197
2198   while (charbuf < charbuf_end)
2199     {
2200       ASSURE_DESTINATION (safe_room);
2201       c = *charbuf++;
2202
2203       if (c < 0)
2204         {
2205           /* Handle an annotation.  */
2206           switch (*charbuf)
2207             {
2208             case CODING_ANNOTATE_COMPOSITION_MASK:
2209               /* Not yet implemented.  */
2210               break;
2211             case CODING_ANNOTATE_CHARSET_MASK:
2212               preferred_charset_id = charbuf[3];
2213               if (preferred_charset_id >= 0
2214                   && NILP (Fmemq (make_number (preferred_charset_id),
2215                                   charset_list)))
2216                 preferred_charset_id = -1;
2217               break;
2218             default:
2219               abort ();
2220             }
2221           charbuf += -c - 1;
2222           continue;
2223         }
2224
2225       if (ASCII_CHAR_P (c))
2226         EMIT_ONE_ASCII_BYTE (c);
2227       else if (CHAR_BYTE8_P (c))
2228         {
2229           c = CHAR_TO_BYTE8 (c);
2230           EMIT_ONE_BYTE (c);
2231         }
2232       else
2233         {
2234           struct charset *charset;
2235           unsigned code;
2236           int dimension;
2237           int emacs_mule_id;
2238           unsigned char leading_codes[2];
2239
2240           if (preferred_charset_id >= 0)
2241             {
2242               charset = CHARSET_FROM_ID (preferred_charset_id);
2243               if (! CHAR_CHARSET_P (c, charset))
2244                 charset = char_charset (c, charset_list, NULL);
2245             }
2246           else
2247             charset = char_charset (c, charset_list, &code);
2248           if (! charset)
2249             {
2250               c = coding->default_char;
2251               if (ASCII_CHAR_P (c))
2252                 {
2253                   EMIT_ONE_ASCII_BYTE (c);
2254                   continue;
2255                 }
2256               charset = char_charset (c, charset_list, &code);
2257             }
2258           dimension = CHARSET_DIMENSION (charset);
2259           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2260           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2261           EMIT_ONE_BYTE (leading_codes[0]);
2262           if (leading_codes[1])
2263             EMIT_ONE_BYTE (leading_codes[1]);
2264           if (dimension == 1)
2265             EMIT_ONE_BYTE (code | 0x80);
2266           else
2267             {
2268               code |= 0x8080;
2269               EMIT_ONE_BYTE (code >> 8);
2270               EMIT_ONE_BYTE (code & 0xFF);
2271             }
2272         }
2273     }
2274   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2275   coding->produced_char += produced_chars;
2276   coding->produced = dst - coding->destination;
2277   return 0;
2278 }
2279
2280 \f
2281 /*** 7. ISO2022 handlers ***/
2282
2283 /* The following note describes the coding system ISO2022 briefly.
2284    Since the intention of this note is to help understand the
2285    functions in this file, some parts are NOT ACCURATE or are OVERLY
2286    SIMPLIFIED.  For thorough understanding, please refer to the
2287    original document of ISO2022.  This is equivalent to the standard
2288    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2289
2290    ISO2022 provides many mechanisms to encode several character sets
2291    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2292    is encoded using bytes less than 128.  This may make the encoded
2293    text a little bit longer, but the text passes more easily through
2294    several types of gateway, some of which strip off the MSB (Most
2295    Significant Bit).
2296
2297    There are two kinds of character sets: control character sets and
2298    graphic character sets.  The former contain control characters such
2299    as `newline' and `escape' to provide control functions (control
2300    functions are also provided by escape sequences).  The latter
2301    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2302    two control character sets and many graphic character sets.
2303
2304    Graphic character sets are classified into one of the following
2305    four classes, according to the number of bytes (DIMENSION) and
2306    number of characters in one dimension (CHARS) of the set:
2307    - DIMENSION1_CHARS94
2308    - DIMENSION1_CHARS96
2309    - DIMENSION2_CHARS94
2310    - DIMENSION2_CHARS96
2311
2312    In addition, each character set is assigned an identification tag,
2313    unique for each set, called the "final character" (denoted as <F>
2314    hereafter).  The <F> of each character set is decided by ECMA(*)
2315    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2316    (0x30..0x3F are for private use only).
2317
2318    Note (*): ECMA = European Computer Manufacturers Association
2319
2320    Here are examples of graphic character sets [NAME(<F>)]:
2321         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2322         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2323         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2324         o DIMENSION2_CHARS96 -- none for the moment
2325
2326    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2327         C0 [0x00..0x1F] -- control character plane 0
2328         GL [0x20..0x7F] -- graphic character plane 0
2329         C1 [0x80..0x9F] -- control character plane 1
2330         GR [0xA0..0xFF] -- graphic character plane 1
2331
2332    A control character set is directly designated and invoked to C0 or
2333    C1 by an escape sequence.  The most common case is that:
2334    - ISO646's  control character set is designated/invoked to C0, and
2335    - ISO6429's control character set is designated/invoked to C1,
2336    and usually these designations/invocations are omitted in encoded
2337    text.  In a 7-bit environment, only C0 can be used, and a control
2338    character for C1 is encoded by an appropriate escape sequence to
2339    fit into the environment.  All control characters for C1 are
2340    defined to have corresponding escape sequences.
2341
2342    A graphic character set is at first designated to one of four
2343    graphic registers (G0 through G3), then these graphic registers are
2344    invoked to GL or GR.  These designations and invocations can be
2345    done independently.  The most common case is that G0 is invoked to
2346    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2347    these invocations and designations are omitted in encoded text.
2348    In a 7-bit environment, only GL can be used.
2349
2350    When a graphic character set of CHARS94 is invoked to GL, codes
2351    0x20 and 0x7F of the GL area work as control characters SPACE and
2352    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2353    be used.
2354
2355    There are two ways of invocation: locking-shift and single-shift.
2356    With locking-shift, the invocation lasts until the next different
2357    invocation, whereas with single-shift, the invocation affects the
2358    following character only and doesn't affect the locking-shift
2359    state.  Invocations are done by the following control characters or
2360    escape sequences:
2361
2362    ----------------------------------------------------------------------
2363    abbrev  function                  cntrl escape seq   description
2364    ----------------------------------------------------------------------
2365    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2366    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2367    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2368    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2369    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2370    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2371    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2372    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2373    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2374    ----------------------------------------------------------------------
2375    (*) These are not used by any known coding system.
2376
2377    Control characters for these functions are defined by macros
2378    ISO_CODE_XXX in `coding.h'.
2379
2380    Designations are done by the following escape sequences:
2381    ----------------------------------------------------------------------
2382    escape sequence      description
2383    ----------------------------------------------------------------------
2384    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2385    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2386    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2387    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2388    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2389    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2390    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2391    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2392    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2393    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2394    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2395    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2396    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2397    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2398    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2399    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2400    ----------------------------------------------------------------------
2401
2402    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2403    of dimension 1, chars 94, and final character <F>, etc...
2404
2405    Note (*): Although these designations are not allowed in ISO2022,
2406    Emacs accepts them on decoding, and produces them on encoding
2407    CHARS96 character sets in a coding system which is characterized as
2408    7-bit environment, non-locking-shift, and non-single-shift.
2409
2410    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2411    '(' must be omitted.  We refer to this as "short-form" hereafter.
2412
2413    Now you may notice that there are a lot of ways of encoding the
2414    same multilingual text in ISO2022.  Actually, there exist many
2415    coding systems such as Compound Text (used in X11's inter client
2416    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2417    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2418    localized platforms), and all of these are variants of ISO2022.
2419
2420    In addition to the above, Emacs handles two more kinds of escape
2421    sequences: ISO6429's direction specification and Emacs' private
2422    sequence for specifying character composition.
2423
2424    ISO6429's direction specification takes the following form:
2425         o CSI ']'      -- end of the current direction
2426         o CSI '0' ']'  -- end of the current direction
2427         o CSI '1' ']'  -- start of left-to-right text
2428         o CSI '2' ']'  -- start of right-to-left text
2429    The control character CSI (0x9B: control sequence introducer) is
2430    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2431
2432    Character composition specification takes the following form:
2433         o ESC '0' -- start relative composition
2434         o ESC '1' -- end composition
2435         o ESC '2' -- start rule-base composition (*)
2436         o ESC '3' -- start relative composition with alternate chars  (**)
2437         o ESC '4' -- start rule-base composition with alternate chars  (**)
2438   Since these are not standard escape sequences of any ISO standard,
2439   the use of them with these meanings is restricted to Emacs only.
2440
2441   (*) This form is used only in Emacs 20.7 and older versions,
2442   but newer versions can safely decode it.
2443   (**) This form is used only in Emacs 21.1 and newer versions,
2444   and older versions can't decode it.
2445
2446   Here's a list of example usages of these composition escape
2447   sequences (categorized by `enum composition_method').
2448
2449   COMPOSITION_RELATIVE:
2450         ESC 0 CHAR [ CHAR ] ESC 1
2451   COMPOSITION_WITH_RULE:
2452         ESC 2 CHAR [ RULE CHAR ] ESC 1
2453   COMPOSITION_WITH_ALTCHARS:
2454         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2455   COMPOSITION_WITH_RULE_ALTCHARS:
2456         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2457
2458 enum iso_code_class_type iso_code_class[256];
2459
2460 #define SAFE_CHARSET_P(coding, id)      \
2461   ((id) <= (coding)->max_charset_id     \
2462    && (coding)->safe_charsets[id] >= 0)
2463
2464
2465 #define SHIFT_OUT_OK(category)  \
2466   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2467
2468 static void
2469 setup_iso_safe_charsets (attrs)
2470      Lisp_Object attrs;
2471 {
2472   Lisp_Object charset_list, safe_charsets;
2473   Lisp_Object request;
2474   Lisp_Object reg_usage;
2475   Lisp_Object tail;
2476   int reg94, reg96;
2477   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2478   int max_charset_id;
2479
2480   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2481   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2482       && ! EQ (charset_list, Viso_2022_charset_list))
2483     {
2484       CODING_ATTR_CHARSET_LIST (attrs)
2485         = charset_list = Viso_2022_charset_list;
2486       ASET (attrs, coding_attr_safe_charsets, Qnil);
2487     }
2488
2489   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2490     return;
2491
2492   max_charset_id = 0;
2493   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2494     {
2495       int id = XINT (XCAR (tail));
2496       if (max_charset_id < id)
2497         max_charset_id = id;
2498     }
2499
2500   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2501                                 make_number (255));
2502   request = AREF (attrs, coding_attr_iso_request);
2503   reg_usage = AREF (attrs, coding_attr_iso_usage);
2504   reg94 = XINT (XCAR (reg_usage));
2505   reg96 = XINT (XCDR (reg_usage));
2506
2507   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2508     {
2509       Lisp_Object id;
2510       Lisp_Object reg;
2511       struct charset *charset;
2512
2513       id = XCAR (tail);
2514       charset = CHARSET_FROM_ID (XINT (id));
2515       reg = Fcdr (Fassq (id, request));
2516       if (! NILP (reg))
2517         SSET (safe_charsets, XINT (id), XINT (reg));
2518       else if (charset->iso_chars_96)
2519         {
2520           if (reg96 < 4)
2521             SSET (safe_charsets, XINT (id), reg96);
2522         }
2523       else
2524         {
2525           if (reg94 < 4)
2526             SSET (safe_charsets, XINT (id), reg94);
2527         }
2528     }
2529   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2530 }
2531
2532
2533 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2534    Check if a text is encoded in one of ISO-2022 based codig systems.
2535    If it is, return 1, else return 0.  */
2536
2537 static int
2538 detect_coding_iso_2022 (coding, detect_info)
2539      struct coding_system *coding;
2540      struct coding_detection_info *detect_info;
2541 {
2542   const unsigned char *src = coding->source, *src_base = src;
2543   const unsigned char *src_end = coding->source + coding->src_bytes;
2544   int multibytep = coding->src_multibyte;
2545   int single_shifting = 0;
2546   int id;
2547   int c, c1;
2548   int consumed_chars = 0;
2549   int i;
2550   int rejected = 0;
2551   int found = 0;
2552
2553   detect_info->checked |= CATEGORY_MASK_ISO;
2554
2555   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2556     {
2557       struct coding_system *this = &(coding_categories[i]);
2558       Lisp_Object attrs, val;
2559
2560       attrs = CODING_ID_ATTRS (this->id);
2561       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2562           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2563         setup_iso_safe_charsets (attrs);
2564       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2565       this->max_charset_id = SCHARS (val) - 1;
2566       this->safe_charsets = (char *) SDATA (val);
2567     }
2568
2569   /* A coding system of this category is always ASCII compatible.  */
2570   src += coding->head_ascii;
2571
2572   while (rejected != CATEGORY_MASK_ISO)
2573     {
2574       src_base = src;
2575       ONE_MORE_BYTE (c);
2576       switch (c)
2577         {
2578         case ISO_CODE_ESC:
2579           if (inhibit_iso_escape_detection)
2580             break;
2581           single_shifting = 0;
2582           ONE_MORE_BYTE (c);
2583           if (c >= '(' && c <= '/')
2584             {
2585               /* Designation sequence for a charset of dimension 1.  */
2586               ONE_MORE_BYTE (c1);
2587               if (c1 < ' ' || c1 >= 0x80
2588                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2589                 /* Invalid designation sequence.  Just ignore.  */
2590                 break;
2591             }
2592           else if (c == '$')
2593             {
2594               /* Designation sequence for a charset of dimension 2.  */
2595               ONE_MORE_BYTE (c);
2596               if (c >= '@' && c <= 'B')
2597                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2598                 id = iso_charset_table[1][0][c];
2599               else if (c >= '(' && c <= '/')
2600                 {
2601                   ONE_MORE_BYTE (c1);
2602                   if (c1 < ' ' || c1 >= 0x80
2603                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2604                     /* Invalid designation sequence.  Just ignore.  */
2605                     break;
2606                 }
2607               else
2608                 /* Invalid designation sequence.  Just ignore it.  */
2609                 break;
2610             }
2611           else if (c == 'N' || c == 'O')
2612             {
2613               /* ESC <Fe> for SS2 or SS3.  */
2614               single_shifting = 1;
2615               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2616               break;
2617             }
2618           else if (c >= '0' && c <= '4')
2619             {
2620               /* ESC <Fp> for start/end composition.  */
2621               found |= CATEGORY_MASK_ISO;
2622               break;
2623             }
2624           else
2625             {
2626               /* Invalid escape sequence.  Just ignore it.  */
2627               break;
2628             }
2629
2630           /* We found a valid designation sequence for CHARSET.  */
2631           rejected |= CATEGORY_MASK_ISO_8BIT;
2632           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2633                               id))
2634             found |= CATEGORY_MASK_ISO_7;
2635           else
2636             rejected |= CATEGORY_MASK_ISO_7;
2637           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2638                               id))
2639             found |= CATEGORY_MASK_ISO_7_TIGHT;
2640           else
2641             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2642           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2643                               id))
2644             found |= CATEGORY_MASK_ISO_7_ELSE;
2645           else
2646             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2647           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2648                               id))
2649             found |= CATEGORY_MASK_ISO_8_ELSE;
2650           else
2651             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2652           break;
2653
2654         case ISO_CODE_SO:
2655         case ISO_CODE_SI:
2656           /* Locking shift out/in.  */
2657           if (inhibit_iso_escape_detection)
2658             break;
2659           single_shifting = 0;
2660           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2661           found |= CATEGORY_MASK_ISO_ELSE;
2662           break;
2663
2664         case ISO_CODE_CSI:
2665           /* Control sequence introducer.  */
2666           single_shifting = 0;
2667           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2668           found |= CATEGORY_MASK_ISO_8_ELSE;
2669           goto check_extra_latin;
2670
2671         case ISO_CODE_SS2:
2672         case ISO_CODE_SS3:
2673           /* Single shift.   */
2674           if (inhibit_iso_escape_detection)
2675             break;
2676           single_shifting = 0;
2677           rejected |= CATEGORY_MASK_ISO_7BIT;
2678           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2679               & CODING_ISO_FLAG_SINGLE_SHIFT)
2680             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
2681           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2682               & CODING_ISO_FLAG_SINGLE_SHIFT)
2683             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2684           if (single_shifting)
2685             break;
2686           goto check_extra_latin;
2687
2688         default:
2689           if (c < 0)
2690             continue;
2691           if (c < 0x80)
2692             {
2693               single_shifting = 0;
2694               break;
2695             }
2696           if (c >= 0xA0)
2697             {
2698               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2699               found |= CATEGORY_MASK_ISO_8_1;
2700               /* Check the length of succeeding codes of the range
2701                  0xA0..0FF.  If the byte length is even, we include
2702                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2703                  only when we are not single shifting.  */
2704               if (! single_shifting
2705                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2706                 {
2707                   int i = 1;
2708                   while (src < src_end)
2709                     {
2710                       ONE_MORE_BYTE (c);
2711                       if (c < 0xA0)
2712                         break;
2713                       i++;
2714                     }
2715
2716                   if (i & 1 && src < src_end)
2717                     rejected |= CATEGORY_MASK_ISO_8_2;
2718                   else
2719                     found |= CATEGORY_MASK_ISO_8_2;
2720                 }
2721               break;
2722             }
2723         check_extra_latin:
2724           single_shifting = 0;
2725           if (! VECTORP (Vlatin_extra_code_table)
2726               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2727             {
2728               rejected = CATEGORY_MASK_ISO;
2729               break;
2730             }
2731           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2732               & CODING_ISO_FLAG_LATIN_EXTRA)
2733             found |= CATEGORY_MASK_ISO_8_1;
2734           else
2735             rejected |= CATEGORY_MASK_ISO_8_1;
2736           rejected |= CATEGORY_MASK_ISO_8_2;
2737         }
2738     }
2739   detect_info->rejected |= CATEGORY_MASK_ISO;
2740   return 0;
2741
2742  no_more_source:
2743   detect_info->rejected |= rejected;
2744   detect_info->found |= (found & ~rejected);
2745   return 1;
2746 }
2747
2748
2749 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
2750    escape sequence should be kept.  */
2751 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2752   do {                                                                  \
2753     int id, prev;                                                       \
2754                                                                         \
2755     if (final < '0' || final >= 128                                     \
2756         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2757         || !SAFE_CHARSET_P (coding, id))                                \
2758       {                                                                 \
2759         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2760         chars_96 = -1;                                                  \
2761         break;                                                          \
2762       }                                                                 \
2763     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2764     if (id == charset_jisx0201_roman)                                   \
2765       {                                                                 \
2766         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2767           id = charset_ascii;                                           \
2768       }                                                                 \
2769     else if (id == charset_jisx0208_1978)                               \
2770       {                                                                 \
2771         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2772           id = charset_jisx0208;                                        \
2773       }                                                                 \
2774     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2775     /* If there was an invalid designation to REG previously, and this  \
2776        designation is ASCII to REG, we should keep this designation     \
2777        sequence.  */                                                    \
2778     if (prev == -2 && id == charset_ascii)                              \
2779       chars_96 = -1;                                                    \
2780   } while (0)
2781
2782
2783 #define MAYBE_FINISH_COMPOSITION()                              \
2784   do {                                                          \
2785     int i;                                                      \
2786     if (composition_state == COMPOSING_NO)                      \
2787       break;                                                    \
2788     /* It is assured that we have enough room for producing     \
2789        characters stored in the table `components'.  */         \
2790     if (charbuf + component_idx > charbuf_end)                  \
2791       goto no_more_source;                                      \
2792     composition_state = COMPOSING_NO;                           \
2793     if (method == COMPOSITION_RELATIVE                          \
2794         || method == COMPOSITION_WITH_ALTCHARS)                 \
2795       {                                                         \
2796         for (i = 0; i < component_idx; i++)                     \
2797           *charbuf++ = components[i];                           \
2798         char_offset += component_idx;                           \
2799       }                                                         \
2800     else                                                        \
2801       {                                                         \
2802         for (i = 0; i < component_idx; i += 2)                  \
2803           *charbuf++ = components[i];                           \
2804         char_offset += (component_idx / 2) + 1;                 \
2805       }                                                         \
2806   } while (0)
2807
2808
2809 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2810    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2811    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2812    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2813    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2814   */
2815
2816 #define DECODE_COMPOSITION_START(c1)                                    \
2817   do {                                                                  \
2818     if (c1 == '0'                                                       \
2819         && composition_state == COMPOSING_COMPONENT_RULE)               \
2820       {                                                                 \
2821         component_len = component_idx;                                  \
2822         composition_state = COMPOSING_CHAR;                             \
2823       }                                                                 \
2824     else                                                                \
2825       {                                                                 \
2826         const unsigned char *p;                                         \
2827                                                                         \
2828         MAYBE_FINISH_COMPOSITION ();                                    \
2829         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
2830           goto no_more_source;                                          \
2831         for (p = src; p < src_end - 1; p++)                             \
2832           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
2833             break;                                                      \
2834         if (p == src_end - 1)                                           \
2835           {                                                             \
2836             /* The current composition doesn't end in the current       \
2837                source.  */                                              \
2838             record_conversion_result                                    \
2839               (coding, CODING_RESULT_INSUFFICIENT_SRC);                 \
2840             goto no_more_source;                                        \
2841           }                                                             \
2842                                                                         \
2843         /* This is surely the start of a composition.  */               \
2844         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
2845                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
2846                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
2847                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
2848         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
2849                              : COMPOSING_COMPONENT_CHAR);               \
2850         component_idx = component_len = 0;                              \
2851       }                                                                 \
2852   } while (0)
2853
2854
2855 /* Handle compositoin end sequence ESC 1.  */
2856
2857 #define DECODE_COMPOSITION_END()                                        \
2858   do {                                                                  \
2859     int nchars = (component_len > 0 ? component_idx - component_len     \
2860                   : method == COMPOSITION_RELATIVE ? component_idx      \
2861                   : (component_idx + 1) / 2);                           \
2862     int i;                                                              \
2863     int *saved_charbuf = charbuf;                                       \
2864                                                                         \
2865     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
2866     if (method != COMPOSITION_RELATIVE)                                 \
2867       {                                                                 \
2868         if (component_len == 0)                                         \
2869           for (i = 0; i < component_idx; i++)                           \
2870             *charbuf++ = components[i];                                 \
2871         else                                                            \
2872           for (i = 0; i < component_len; i++)                           \
2873             *charbuf++ = components[i];                                 \
2874         *saved_charbuf = saved_charbuf - charbuf;                       \
2875       }                                                                 \
2876     if (method == COMPOSITION_WITH_RULE)                                \
2877       for (i = 0; i < component_idx; i += 2, char_offset++)             \
2878         *charbuf++ = components[i];                                     \
2879     else                                                                \
2880       for (i = component_len; i < component_idx; i++, char_offset++)    \
2881         *charbuf++ = components[i];                                     \
2882     coding->annotated = 1;                                              \
2883     composition_state = COMPOSING_NO;                                   \
2884   } while (0)
2885
2886
2887 /* Decode a composition rule from the byte C1 (and maybe one more byte
2888    from SRC) and store one encoded composition rule in
2889    coding->cmp_data.  */
2890
2891 #define DECODE_COMPOSITION_RULE(c1)                                     \
2892   do {                                                                  \
2893     (c1) -= 32;                                                         \
2894     if (c1 < 81)                /* old format (before ver.21) */        \
2895       {                                                                 \
2896         int gref = (c1) / 9;                                            \
2897         int nref = (c1) % 9;                                            \
2898         if (gref == 4) gref = 10;                                       \
2899         if (nref == 4) nref = 10;                                       \
2900         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
2901       }                                                                 \
2902     else if (c1 < 93)           /* new format (after ver.21) */         \
2903       {                                                                 \
2904         ONE_MORE_BYTE (c2);                                             \
2905         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
2906       }                                                                 \
2907     else                                                                \
2908       c1 = 0;                                                           \
2909   } while (0)
2910
2911
2912 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2913
2914 static void
2915 decode_coding_iso_2022 (coding)
2916      struct coding_system *coding;
2917 {
2918   const unsigned char *src = coding->source + coding->consumed;
2919   const unsigned char *src_end = coding->source + coding->src_bytes;
2920   const unsigned char *src_base;
2921   int *charbuf = coding->charbuf + coding->charbuf_used;
2922   int *charbuf_end
2923     = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
2924   int consumed_chars = 0, consumed_chars_base;
2925   int multibytep = coding->src_multibyte;
2926   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
2927   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2928   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2929   int charset_id_2, charset_id_3;
2930   struct charset *charset;
2931   int c;
2932   /* For handling composition sequence.  */
2933 #define COMPOSING_NO                    0
2934 #define COMPOSING_CHAR                  1
2935 #define COMPOSING_RULE                  2
2936 #define COMPOSING_COMPONENT_CHAR        3
2937 #define COMPOSING_COMPONENT_RULE        4
2938
2939   int composition_state = COMPOSING_NO;
2940   enum composition_method method;
2941   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2942   int component_idx;
2943   int component_len;
2944   Lisp_Object attrs, charset_list;
2945   int char_offset = coding->produced_char;
2946   int last_offset = char_offset;
2947   int last_id = charset_ascii;
2948
2949   CODING_GET_INFO (coding, attrs, charset_list);
2950   setup_iso_safe_charsets (attrs);
2951
2952   while (1)
2953     {
2954       int c1, c2;
2955
2956       src_base = src;
2957       consumed_chars_base = consumed_chars;
2958
2959       if (charbuf >= charbuf_end)
2960         break;
2961
2962       ONE_MORE_BYTE (c1);
2963       if (c1 < 0)
2964         goto invalid_code;
2965
2966       /* We produce at most one character.  */
2967       switch (iso_code_class [c1])
2968         {
2969         case ISO_0x20_or_0x7F:
2970           if (composition_state != COMPOSING_NO)
2971             {
2972               if (composition_state == COMPOSING_RULE
2973                   || composition_state == COMPOSING_COMPONENT_RULE)
2974                 {
2975                   DECODE_COMPOSITION_RULE (c1);
2976                   components[component_idx++] = c1;
2977                   composition_state--;
2978                   continue;
2979                 }
2980             }
2981           if (charset_id_0 < 0
2982               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2983             /* This is SPACE or DEL.  */
2984             charset = CHARSET_FROM_ID (charset_ascii);
2985           else
2986             charset = CHARSET_FROM_ID (charset_id_0);
2987           break;
2988
2989         case ISO_graphic_plane_0:
2990           if (composition_state != COMPOSING_NO)
2991             {
2992               if (composition_state == COMPOSING_RULE
2993                   || composition_state == COMPOSING_COMPONENT_RULE)
2994                 {
2995                   DECODE_COMPOSITION_RULE (c1);
2996                   components[component_idx++] = c1;
2997                   composition_state--;
2998                   continue;
2999                 }
3000             }
3001           if (charset_id_0 < 0)
3002             charset = CHARSET_FROM_ID (charset_ascii);
3003           else
3004             charset = CHARSET_FROM_ID (charset_id_0);
3005           break;
3006
3007         case ISO_0xA0_or_0xFF:
3008           if (charset_id_1 < 0
3009               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3010               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3011             goto invalid_code;
3012           /* This is a graphic character, we fall down ... */
3013
3014         case ISO_graphic_plane_1:
3015           if (charset_id_1 < 0)
3016             goto invalid_code;
3017           charset = CHARSET_FROM_ID (charset_id_1);
3018           break;
3019
3020         case ISO_control_0:
3021           MAYBE_FINISH_COMPOSITION ();
3022           charset = CHARSET_FROM_ID (charset_ascii);
3023           break;
3024
3025         case ISO_control_1:
3026           MAYBE_FINISH_COMPOSITION ();
3027           goto invalid_code;
3028
3029         case ISO_shift_out:
3030           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3031               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3032             goto invalid_code;
3033           CODING_ISO_INVOCATION (coding, 0) = 1;
3034           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3035           continue;
3036
3037         case ISO_shift_in:
3038           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3039             goto invalid_code;
3040           CODING_ISO_INVOCATION (coding, 0) = 0;
3041           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3042           continue;
3043
3044         case ISO_single_shift_2_7:
3045         case ISO_single_shift_2:
3046           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3047             goto invalid_code;
3048           /* SS2 is handled as an escape sequence of ESC 'N' */
3049           c1 = 'N';
3050           goto label_escape_sequence;
3051
3052         case ISO_single_shift_3:
3053           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3054             goto invalid_code;
3055           /* SS2 is handled as an escape sequence of ESC 'O' */
3056           c1 = 'O';
3057           goto label_escape_sequence;
3058
3059         case ISO_control_sequence_introducer:
3060           /* CSI is handled as an escape sequence of ESC '[' ...  */
3061           c1 = '[';
3062           goto label_escape_sequence;
3063
3064         case ISO_escape:
3065           ONE_MORE_BYTE (c1);
3066         label_escape_sequence:
3067           /* Escape sequences handled here are invocation,
3068              designation, direction specification, and character
3069              composition specification.  */
3070           switch (c1)
3071             {
3072             case '&':           /* revision of following character set */
3073               ONE_MORE_BYTE (c1);
3074               if (!(c1 >= '@' && c1 <= '~'))
3075                 goto invalid_code;
3076               ONE_MORE_BYTE (c1);
3077               if (c1 != ISO_CODE_ESC)
3078                 goto invalid_code;
3079               ONE_MORE_BYTE (c1);
3080               goto label_escape_sequence;
3081
3082             case '$':           /* designation of 2-byte character set */
3083               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3084                 goto invalid_code;
3085               {
3086                 int reg, chars96;
3087
3088                 ONE_MORE_BYTE (c1);
3089                 if (c1 >= '@' && c1 <= 'B')
3090                   {     /* designation of JISX0208.1978, GB2312.1980,
3091                            or JISX0208.1980 */
3092                     reg = 0, chars96 = 0;
3093                   }
3094                 else if (c1 >= 0x28 && c1 <= 0x2B)
3095                   { /* designation of DIMENSION2_CHARS94 character set */
3096                     reg = c1 - 0x28, chars96 = 0;
3097                     ONE_MORE_BYTE (c1);
3098                   }
3099                 else if (c1 >= 0x2C && c1 <= 0x2F)
3100                   { /* designation of DIMENSION2_CHARS96 character set */
3101                     reg = c1 - 0x2C, chars96 = 1;
3102                     ONE_MORE_BYTE (c1);
3103                   }
3104                 else
3105                   goto invalid_code;
3106                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3107                 /* We must update these variables now.  */
3108                 if (reg == 0)
3109                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3110                 else if (reg == 1)
3111                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3112                 if (chars96 < 0)
3113                   goto invalid_code;
3114               }
3115               continue;
3116
3117             case 'n':           /* invocation of locking-shift-2 */
3118               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3119                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3120                 goto invalid_code;
3121               CODING_ISO_INVOCATION (coding, 0) = 2;
3122               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3123               continue;
3124
3125             case 'o':           /* invocation of locking-shift-3 */
3126               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3127                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3128                 goto invalid_code;
3129               CODING_ISO_INVOCATION (coding, 0) = 3;
3130               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3131               continue;
3132
3133             case 'N':           /* invocation of single-shift-2 */
3134               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3135                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3136                 goto invalid_code;
3137               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3138               if (charset_id_2 < 0)
3139                 charset = CHARSET_FROM_ID (charset_ascii);
3140               else
3141                 charset = CHARSET_FROM_ID (charset_id_2);
3142               ONE_MORE_BYTE (c1);
3143               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3144                 goto invalid_code;
3145               break;
3146
3147             case 'O':           /* invocation of single-shift-3 */
3148               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3149                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3150                 goto invalid_code;
3151               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3152               if (charset_id_3 < 0)
3153                 charset = CHARSET_FROM_ID (charset_ascii);
3154               else
3155                 charset = CHARSET_FROM_ID (charset_id_3);
3156               ONE_MORE_BYTE (c1);
3157               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3158                 goto invalid_code;
3159               break;
3160
3161             case '0': case '2': case '3': case '4': /* start composition */
3162               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3163                 goto invalid_code;
3164               DECODE_COMPOSITION_START (c1);
3165               continue;
3166
3167             case '1':           /* end composition */
3168               if (composition_state == COMPOSING_NO)
3169                 goto invalid_code;
3170               DECODE_COMPOSITION_END ();
3171               continue;
3172
3173             case '[':           /* specification of direction */
3174               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3175                 goto invalid_code;
3176               /* For the moment, nested direction is not supported.
3177                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3178                  left-to-right, and nozero means right-to-left.  */
3179               ONE_MORE_BYTE (c1);
3180               switch (c1)
3181                 {
3182                 case ']':       /* end of the current direction */
3183                   coding->mode &= ~CODING_MODE_DIRECTION;
3184
3185                 case '0':       /* end of the current direction */
3186                 case '1':       /* start of left-to-right direction */
3187                   ONE_MORE_BYTE (c1);
3188                   if (c1 == ']')
3189                     coding->mode &= ~CODING_MODE_DIRECTION;
3190                   else
3191                     goto invalid_code;
3192                   break;
3193
3194                 case '2':       /* start of right-to-left direction */
3195                   ONE_MORE_BYTE (c1);
3196                   if (c1 == ']')
3197                     coding->mode |= CODING_MODE_DIRECTION;
3198                   else
3199                     goto invalid_code;
3200                   break;
3201
3202                 default:
3203                   goto invalid_code;
3204                 }
3205               continue;
3206
3207             case '%':
3208               ONE_MORE_BYTE (c1);
3209               if (c1 == '/')
3210                 {
3211                   /* CTEXT extended segment:
3212                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3213                      We keep these bytes as is for the moment.
3214                      They may be decoded by post-read-conversion.  */
3215                   int dim, M, L;
3216                   int size;
3217
3218                   ONE_MORE_BYTE (dim);
3219                   ONE_MORE_BYTE (M);
3220                   ONE_MORE_BYTE (L);
3221                   size = ((M - 128) * 128) + (L - 128);
3222                   if (charbuf + 8 + size > charbuf_end)
3223                     goto break_loop;
3224                   *charbuf++ = ISO_CODE_ESC;
3225                   *charbuf++ = '%';
3226                   *charbuf++ = '/';
3227                   *charbuf++ = dim;
3228                   *charbuf++ = BYTE8_TO_CHAR (M);
3229                   *charbuf++ = BYTE8_TO_CHAR (L);
3230                   while (size-- > 0)
3231                     {
3232                       ONE_MORE_BYTE (c1);
3233                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3234                     }
3235                 }
3236               else if (c1 == 'G')
3237                 {
3238                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3239                      ESC % G --UTF-8-BYTES-- ESC % @
3240                      We keep these bytes as is for the moment.
3241                      They may be decoded by post-read-conversion.  */
3242                   int *p = charbuf;
3243
3244                   if (p + 6 > charbuf_end)
3245                     goto break_loop;
3246                   *p++ = ISO_CODE_ESC;
3247                   *p++ = '%';
3248                   *p++ = 'G';
3249                   while (p < charbuf_end)
3250                     {
3251                       ONE_MORE_BYTE (c1);
3252                       if (c1 == ISO_CODE_ESC
3253                           && src + 1 < src_end
3254                           && src[0] == '%'
3255                           && src[1] == '@')
3256                         {
3257                           src += 2;
3258                           break;
3259                         }
3260                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3261                     }
3262                   if (p + 3 > charbuf_end)
3263                     goto break_loop;
3264                   *p++ = ISO_CODE_ESC;
3265                   *p++ = '%';
3266                   *p++ = '@';
3267                   charbuf = p;
3268                 }
3269               else
3270                 goto invalid_code;
3271               continue;
3272               break;
3273
3274             default:
3275               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3276                 goto invalid_code;
3277               {
3278                 int reg, chars96;
3279
3280                 if (c1 >= 0x28 && c1 <= 0x2B)
3281                   { /* designation of DIMENSION1_CHARS94 character set */
3282                     reg = c1 - 0x28, chars96 = 0;
3283                     ONE_MORE_BYTE (c1);
3284                   }
3285                 else if (c1 >= 0x2C && c1 <= 0x2F)
3286                   { /* designation of DIMENSION1_CHARS96 character set */
3287                     reg = c1 - 0x2C, chars96 = 1;
3288                     ONE_MORE_BYTE (c1);
3289                   }
3290                 else
3291                   goto invalid_code;
3292                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3293                 /* We must update these variables now.  */
3294                 if (reg == 0)
3295                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3296                 else if (reg == 1)
3297                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3298                 if (chars96 < 0)
3299                   goto invalid_code;
3300               }
3301               continue;
3302             }
3303         }
3304
3305       if (charset->id != charset_ascii
3306           && last_id != charset->id)
3307         {
3308           if (last_id != charset_ascii)
3309             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3310           last_id = charset->id;
3311           last_offset = char_offset;
3312         }
3313
3314       /* Now we know CHARSET and 1st position code C1 of a character.
3315          Produce a decoded character while getting 2nd position code
3316          C2 if necessary.  */
3317       c1 &= 0x7F;
3318       if (CHARSET_DIMENSION (charset) > 1)
3319         {
3320           ONE_MORE_BYTE (c2);
3321           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3322             /* C2 is not in a valid range.  */
3323             goto invalid_code;
3324           c1 = (c1 << 8) | (c2 & 0x7F);
3325           if (CHARSET_DIMENSION (charset) > 2)
3326             {
3327               ONE_MORE_BYTE (c2);
3328               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3329                 /* C2 is not in a valid range.  */
3330                 goto invalid_code;
3331               c1 = (c1 << 8) | (c2 & 0x7F);
3332             }
3333         }
3334
3335       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3336       if (c < 0)
3337         {
3338           MAYBE_FINISH_COMPOSITION ();
3339           for (; src_base < src; src_base++, char_offset++)
3340             {
3341               if (ASCII_BYTE_P (*src_base))
3342                 *charbuf++ = *src_base;
3343               else
3344                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3345             }
3346         }
3347       else if (composition_state == COMPOSING_NO)
3348         {
3349           *charbuf++ = c;
3350           char_offset++;
3351         }
3352       else
3353         {
3354           components[component_idx++] = c;
3355           if (method == COMPOSITION_WITH_RULE
3356               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3357                   && composition_state == COMPOSING_COMPONENT_CHAR))
3358             composition_state++;
3359         }
3360       continue;
3361
3362     invalid_code:
3363       MAYBE_FINISH_COMPOSITION ();
3364       src = src_base;
3365       consumed_chars = consumed_chars_base;
3366       ONE_MORE_BYTE (c);
3367       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3368       char_offset++;
3369       coding->errors++;
3370       continue;
3371
3372     break_loop:
3373       break;
3374     }
3375
3376  no_more_source:
3377   if (last_id != charset_ascii)
3378     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3379   coding->consumed_char += consumed_chars_base;
3380   coding->consumed = src_base - coding->source;
3381   coding->charbuf_used = charbuf - coding->charbuf;
3382 }
3383
3384
3385 /* ISO2022 encoding stuff.  */
3386
3387 /*
3388    It is not enough to say just "ISO2022" on encoding, we have to
3389    specify more details.  In Emacs, each coding system of ISO2022
3390    variant has the following specifications:
3391         1. Initial designation to G0 thru G3.
3392         2. Allows short-form designation?
3393         3. ASCII should be designated to G0 before control characters?
3394         4. ASCII should be designated to G0 at end of line?
3395         5. 7-bit environment or 8-bit environment?
3396         6. Use locking-shift?
3397         7. Use Single-shift?
3398    And the following two are only for Japanese:
3399         8. Use ASCII in place of JIS0201-1976-Roman?
3400         9. Use JISX0208-1983 in place of JISX0208-1978?
3401    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3402    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3403    details.
3404 */
3405
3406 /* Produce codes (escape sequence) for designating CHARSET to graphic
3407    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3408    '@', 'A', or 'B' and the coding system CODING allows, produce
3409    designation sequence of short-form.  */
3410
3411 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3412   do {                                                                  \
3413     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3414     char *intermediate_char_94 = "()*+";                                \
3415     char *intermediate_char_96 = ",-./";                                \
3416     int revision = -1;                                                  \
3417     int c;                                                              \
3418                                                                         \
3419     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3420       revision = CHARSET_ISO_REVISION (charset);                        \
3421                                                                         \
3422     if (revision >= 0)                                                  \
3423       {                                                                 \
3424         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3425         EMIT_ONE_BYTE ('@' + revision);                                 \
3426       }                                                                 \
3427     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3428     if (CHARSET_DIMENSION (charset) == 1)                               \
3429       {                                                                 \
3430         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3431           c = intermediate_char_94[reg];                                \
3432         else                                                            \
3433           c = intermediate_char_96[reg];                                \
3434         EMIT_ONE_ASCII_BYTE (c);                                        \
3435       }                                                                 \
3436     else                                                                \
3437       {                                                                 \
3438         EMIT_ONE_ASCII_BYTE ('$');                                      \
3439         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3440           {                                                             \
3441             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3442                 || reg != 0                                             \
3443                 || final_char < '@' || final_char > 'B')                \
3444               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3445           }                                                             \
3446         else                                                            \
3447           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3448       }                                                                 \
3449     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3450                                                                         \
3451     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3452   } while (0)
3453
3454
3455 /* The following two macros produce codes (control character or escape
3456    sequence) for ISO2022 single-shift functions (single-shift-2 and
3457    single-shift-3).  */
3458
3459 #define ENCODE_SINGLE_SHIFT_2                                           \
3460   do {                                                                  \
3461     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3462       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3463     else                                                                \
3464       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3465     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3466   } while (0)
3467
3468
3469 #define ENCODE_SINGLE_SHIFT_3                                           \
3470   do {                                                                  \
3471     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3472       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3473     else                                                                \
3474       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3475     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3476   } while (0)
3477
3478
3479 /* The following four macros produce codes (control character or
3480    escape sequence) for ISO2022 locking-shift functions (shift-in,
3481    shift-out, locking-shift-2, and locking-shift-3).  */
3482
3483 #define ENCODE_SHIFT_IN                                 \
3484   do {                                                  \
3485     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3486     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3487   } while (0)
3488
3489
3490 #define ENCODE_SHIFT_OUT                                \
3491   do {                                                  \
3492     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3493     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3494   } while (0)
3495
3496
3497 #define ENCODE_LOCKING_SHIFT_2                          \
3498   do {                                                  \
3499     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3500     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3501   } while (0)
3502
3503
3504 #define ENCODE_LOCKING_SHIFT_3                          \
3505   do {                                                  \
3506     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3507     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3508   } while (0)
3509
3510
3511 /* Produce codes for a DIMENSION1 character whose character set is
3512    CHARSET and whose position-code is C1.  Designation and invocation
3513    sequences are also produced in advance if necessary.  */
3514
3515 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3516   do {                                                                  \
3517     int id = CHARSET_ID (charset);                                      \
3518                                                                         \
3519     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3520         && id == charset_ascii)                                         \
3521       {                                                                 \
3522         id = charset_jisx0201_roman;                                    \
3523         charset = CHARSET_FROM_ID (id);                                 \
3524       }                                                                 \
3525                                                                         \
3526     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3527       {                                                                 \
3528         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3529           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3530         else                                                            \
3531           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3532         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3533         break;                                                          \
3534       }                                                                 \
3535     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3536       {                                                                 \
3537         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3538         break;                                                          \
3539       }                                                                 \
3540     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3541       {                                                                 \
3542         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3543         break;                                                          \
3544       }                                                                 \
3545     else                                                                \
3546       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3547          must invoke it, or, at first, designate it to some graphic     \
3548          register.  Then repeat the loop to actually produce the        \
3549          character.  */                                                 \
3550       dst = encode_invocation_designation (charset, coding, dst,        \
3551                                            &produced_chars);            \
3552   } while (1)
3553
3554
3555 /* Produce codes for a DIMENSION2 character whose character set is
3556    CHARSET and whose position-codes are C1 and C2.  Designation and
3557    invocation codes are also produced in advance if necessary.  */
3558
3559 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3560   do {                                                                  \
3561     int id = CHARSET_ID (charset);                                      \
3562                                                                         \
3563     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3564         && id == charset_jisx0208)                                      \
3565       {                                                                 \
3566         id = charset_jisx0208_1978;                                     \
3567         charset = CHARSET_FROM_ID (id);                                 \
3568       }                                                                 \
3569                                                                         \
3570     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3571       {                                                                 \
3572         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3573           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3574         else                                                            \
3575           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3576         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3577         break;                                                          \
3578       }                                                                 \
3579     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3580       {                                                                 \
3581         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3582         break;                                                          \
3583       }                                                                 \
3584     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3585       {                                                                 \
3586         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3587         break;                                                          \
3588       }                                                                 \
3589     else                                                                \
3590       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3591          must invoke it, or, at first, designate it to some graphic     \
3592          register.  Then repeat the loop to actually produce the        \
3593          character.  */                                                 \
3594       dst = encode_invocation_designation (charset, coding, dst,        \
3595                                            &produced_chars);            \
3596   } while (1)
3597
3598
3599 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3600   do {                                                                     \
3601     int code = ENCODE_CHAR ((charset),(c));                                \
3602                                                                            \
3603     if (CHARSET_DIMENSION (charset) == 1)                                  \
3604       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3605     else                                                                   \
3606       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3607   } while (0)
3608
3609
3610 /* Produce designation and invocation codes at a place pointed by DST
3611    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3612    Return new DST.  */
3613
3614 unsigned char *
3615 encode_invocation_designation (charset, coding, dst, p_nchars)
3616      struct charset *charset;
3617      struct coding_system *coding;
3618      unsigned char *dst;
3619      int *p_nchars;
3620 {
3621   int multibytep = coding->dst_multibyte;
3622   int produced_chars = *p_nchars;
3623   int reg;                      /* graphic register number */
3624   int id = CHARSET_ID (charset);
3625
3626   /* At first, check designations.  */
3627   for (reg = 0; reg < 4; reg++)
3628     if (id == CODING_ISO_DESIGNATION (coding, reg))
3629       break;
3630
3631   if (reg >= 4)
3632     {
3633       /* CHARSET is not yet designated to any graphic registers.  */
3634       /* At first check the requested designation.  */
3635       reg = CODING_ISO_REQUEST (coding, id);
3636       if (reg < 0)
3637         /* Since CHARSET requests no special designation, designate it
3638            to graphic register 0.  */
3639         reg = 0;
3640
3641       ENCODE_DESIGNATION (charset, reg, coding);
3642     }
3643
3644   if (CODING_ISO_INVOCATION (coding, 0) != reg
3645       && CODING_ISO_INVOCATION (coding, 1) != reg)
3646     {
3647       /* Since the graphic register REG is not invoked to any graphic
3648          planes, invoke it to graphic plane 0.  */
3649       switch (reg)
3650         {
3651         case 0:                 /* graphic register 0 */
3652           ENCODE_SHIFT_IN;
3653           break;
3654
3655         case 1:                 /* graphic register 1 */
3656           ENCODE_SHIFT_OUT;
3657           break;
3658
3659         case 2:                 /* graphic register 2 */
3660           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3661             ENCODE_SINGLE_SHIFT_2;
3662           else
3663             ENCODE_LOCKING_SHIFT_2;
3664           break;
3665
3666         case 3:                 /* graphic register 3 */
3667           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3668             ENCODE_SINGLE_SHIFT_3;
3669           else
3670             ENCODE_LOCKING_SHIFT_3;
3671           break;
3672         }
3673     }
3674
3675   *p_nchars = produced_chars;
3676   return dst;
3677 }
3678
3679 /* The following three macros produce codes for indicating direction
3680    of text.  */
3681 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3682   do {                                                                  \
3683     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3684       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3685     else                                                                \
3686       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3687   } while (0)
3688
3689
3690 #define ENCODE_DIRECTION_R2L()                  \
3691   do {                                          \
3692     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3693     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3694   } while (0)
3695
3696
3697 #define ENCODE_DIRECTION_L2R()                  \
3698   do {                                          \
3699     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3700     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3701   } while (0)
3702
3703
3704 /* Produce codes for designation and invocation to reset the graphic
3705    planes and registers to initial state.  */
3706 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3707   do {                                                                  \
3708     int reg;                                                            \
3709     struct charset *charset;                                            \
3710                                                                         \
3711     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3712       ENCODE_SHIFT_IN;                                                  \
3713     for (reg = 0; reg < 4; reg++)                                       \
3714       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3715           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3716               != CODING_ISO_INITIAL (coding, reg)))                     \
3717         {                                                               \
3718           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3719           ENCODE_DESIGNATION (charset, reg, coding);                    \
3720         }                                                               \
3721   } while (0)
3722
3723
3724 /* Produce designation sequences of charsets in the line started from
3725    SRC to a place pointed by DST, and return updated DST.
3726
3727    If the current block ends before any end-of-line, we may fail to
3728    find all the necessary designations.  */
3729
3730 static unsigned char *
3731 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3732      struct coding_system *coding;
3733      int *charbuf, *charbuf_end;
3734      unsigned char *dst;
3735 {
3736   struct charset *charset;
3737   /* Table of charsets to be designated to each graphic register.  */
3738   int r[4];
3739   int c, found = 0, reg;
3740   int produced_chars = 0;
3741   int multibytep = coding->dst_multibyte;
3742   Lisp_Object attrs;
3743   Lisp_Object charset_list;
3744
3745   attrs = CODING_ID_ATTRS (coding->id);
3746   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3747   if (EQ (charset_list, Qiso_2022))
3748     charset_list = Viso_2022_charset_list;
3749
3750   for (reg = 0; reg < 4; reg++)
3751     r[reg] = -1;
3752
3753   while (found < 4)
3754     {
3755       int id;
3756
3757       c = *charbuf++;
3758       if (c == '\n')
3759         break;
3760       charset = char_charset (c, charset_list, NULL);
3761       id = CHARSET_ID (charset);
3762       reg = CODING_ISO_REQUEST (coding, id);
3763       if (reg >= 0 && r[reg] < 0)
3764         {
3765           found++;
3766           r[reg] = id;
3767         }
3768     }
3769
3770   if (found)
3771     {
3772       for (reg = 0; reg < 4; reg++)
3773         if (r[reg] >= 0
3774             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3775           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3776     }
3777
3778   return dst;
3779 }
3780
3781 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
3782
3783 static int
3784 encode_coding_iso_2022 (coding)
3785      struct coding_system *coding;
3786 {
3787   int multibytep = coding->dst_multibyte;
3788   int *charbuf = coding->charbuf;
3789   int *charbuf_end = charbuf + coding->charbuf_used;
3790   unsigned char *dst = coding->destination + coding->produced;
3791   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3792   int safe_room = 16;
3793   int bol_designation
3794     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3795        && CODING_ISO_BOL (coding));
3796   int produced_chars = 0;
3797   Lisp_Object attrs, eol_type, charset_list;
3798   int ascii_compatible;
3799   int c;
3800   int preferred_charset_id = -1;
3801
3802   CODING_GET_INFO (coding, attrs, charset_list);
3803   eol_type = CODING_ID_EOL_TYPE (coding->id);
3804   if (VECTORP (eol_type))
3805     eol_type = Qunix;
3806
3807   setup_iso_safe_charsets (attrs);
3808   /* Charset list may have been changed.  */
3809   charset_list = CODING_ATTR_CHARSET_LIST (attrs);              \
3810   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3811
3812   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3813
3814   while (charbuf < charbuf_end)
3815     {
3816       ASSURE_DESTINATION (safe_room);
3817
3818       if (bol_designation)
3819         {
3820           unsigned char *dst_prev = dst;
3821
3822           /* We have to produce designation sequences if any now.  */
3823           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3824           bol_designation = 0;
3825           /* We are sure that designation sequences are all ASCII bytes.  */
3826           produced_chars += dst - dst_prev;
3827         }
3828
3829       c = *charbuf++;
3830
3831       if (c < 0)
3832         {
3833           /* Handle an annotation.  */
3834           switch (*charbuf)
3835             {
3836             case CODING_ANNOTATE_COMPOSITION_MASK:
3837               /* Not yet implemented.  */
3838               break;
3839             case CODING_ANNOTATE_CHARSET_MASK:
3840               preferred_charset_id = charbuf[2];
3841               if (preferred_charset_id >= 0
3842                   && NILP (Fmemq (make_number (preferred_charset_id),
3843                                   charset_list)))
3844                 preferred_charset_id = -1;
3845               break;
3846             default:
3847               abort ();
3848             }
3849           charbuf += -c - 1;
3850           continue;
3851         }
3852
3853       /* Now encode the character C.  */
3854       if (c < 0x20 || c == 0x7F)
3855         {
3856           if (c == '\n'
3857               || (c == '\r' && EQ (eol_type, Qmac)))
3858             {
3859               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3860                 ENCODE_RESET_PLANE_AND_REGISTER ();
3861               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3862                 {
3863                   int i;
3864
3865                   for (i = 0; i < 4; i++)
3866                     CODING_ISO_DESIGNATION (coding, i)
3867                       = CODING_ISO_INITIAL (coding, i);
3868                 }
3869               bol_designation
3870                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3871             }
3872           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3873             ENCODE_RESET_PLANE_AND_REGISTER ();
3874           EMIT_ONE_ASCII_BYTE (c);
3875         }
3876       else if (ASCII_CHAR_P (c))
3877         {
3878           if (ascii_compatible)
3879             EMIT_ONE_ASCII_BYTE (c);
3880           else
3881             {
3882               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3883               ENCODE_ISO_CHARACTER (charset, c);
3884             }
3885         }
3886       else if (CHAR_BYTE8_P (c))
3887         {
3888           c = CHAR_TO_BYTE8 (c);
3889           EMIT_ONE_BYTE (c);
3890         }
3891       else
3892         {
3893           struct charset *charset;
3894
3895           if (preferred_charset_id >= 0)
3896             {
3897               charset = CHARSET_FROM_ID (preferred_charset_id);
3898               if (! CHAR_CHARSET_P (c, charset))
3899                 charset = char_charset (c, charset_list, NULL);
3900             }
3901           else
3902             charset = char_charset (c, charset_list, NULL);
3903           if (!charset)
3904             {
3905               if (coding->mode & CODING_MODE_SAFE_ENCODING)
3906                 {
3907                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3908                   charset = CHARSET_FROM_ID (charset_ascii);
3909                 }
3910               else
3911                 {
3912                   c = coding->default_char;
3913                   charset = char_charset (c, charset_list, NULL);
3914                 }
3915             }
3916           ENCODE_ISO_CHARACTER (charset, c);
3917         }
3918     }
3919
3920   if (coding->mode & CODING_MODE_LAST_BLOCK
3921       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3922     {
3923       ASSURE_DESTINATION (safe_room);
3924       ENCODE_RESET_PLANE_AND_REGISTER ();
3925     }
3926   record_conversion_result (coding, CODING_RESULT_SUCCESS);
3927   CODING_ISO_BOL (coding) = bol_designation;
3928   coding->produced_char += produced_chars;
3929   coding->produced = dst - coding->destination;
3930   return 0;
3931 }
3932
3933 \f
3934 /*** 8,9. SJIS and BIG5 handlers ***/
3935
3936 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3937    quite widely.  So, for the moment, Emacs supports them in the bare
3938    C code.  But, in the future, they may be supported only by CCL.  */
3939
3940 /* SJIS is a coding system encoding three character sets: ASCII, right
3941    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
3942    as is.  A character of charset katakana-jisx0201 is encoded by
3943    "position-code + 0x80".  A character of charset japanese-jisx0208
3944    is encoded in 2-byte but two position-codes are divided and shifted
3945    so that it fit in the range below.
3946
3947    --- CODE RANGE of SJIS ---
3948    (character set)      (range)
3949    ASCII                0x00 .. 0x7F
3950    KATAKANA-JISX0201    0xA0 .. 0xDF
3951    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
3952             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
3953    -------------------------------
3954
3955 */
3956
3957 /* BIG5 is a coding system encoding two character sets: ASCII and
3958    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
3959    character set and is encoded in two-byte.
3960
3961    --- CODE RANGE of BIG5 ---
3962    (character set)      (range)
3963    ASCII                0x00 .. 0x7F
3964    Big5 (1st byte)      0xA1 .. 0xFE
3965         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
3966    --------------------------
3967
3968   */
3969
3970 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3971    Check if a text is encoded in SJIS.  If it is, return
3972    CATEGORY_MASK_SJIS, else return 0.  */
3973
3974 static int
3975 detect_coding_sjis (coding, detect_info)
3976      struct coding_system *coding;
3977      struct coding_detection_info *detect_info;
3978 {
3979   const unsigned char *src = coding->source, *src_base;
3980   const unsigned char *src_end = coding->source + coding->src_bytes;
3981   int multibytep = coding->src_multibyte;
3982   int consumed_chars = 0;
3983   int found = 0;
3984   int c;
3985
3986   detect_info->checked |= CATEGORY_MASK_SJIS;
3987   /* A coding system of this category is always ASCII compatible.  */
3988   src += coding->head_ascii;
3989
3990   while (1)
3991     {
3992       src_base = src;
3993       ONE_MORE_BYTE (c);
3994       if (c < 0x80)
3995         continue;
3996       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3997         {
3998           ONE_MORE_BYTE (c);
3999           if (c < 0x40 || c == 0x7F || c > 0xFC)
4000             break;
4001           found = CATEGORY_MASK_SJIS;
4002         }
4003       else if (c >= 0xA0 && c < 0xE0)
4004         found = CATEGORY_MASK_SJIS;
4005       else
4006         break;
4007     }
4008   detect_info->rejected |= CATEGORY_MASK_SJIS;
4009   return 0;
4010
4011  no_more_source:
4012   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4013     {
4014       detect_info->rejected |= CATEGORY_MASK_SJIS;
4015       return 0;
4016     }
4017   detect_info->found |= found;
4018   return 1;
4019 }
4020
4021 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4022    Check if a text is encoded in BIG5.  If it is, return
4023    CATEGORY_MASK_BIG5, else return 0.  */
4024
4025 static int
4026 detect_coding_big5 (coding, detect_info)
4027      struct coding_system *coding;
4028      struct coding_detection_info *detect_info;
4029 {
4030   const unsigned char *src = coding->source, *src_base;
4031   const unsigned char *src_end = coding->source + coding->src_bytes;
4032   int multibytep = coding->src_multibyte;
4033   int consumed_chars = 0;
4034   int found = 0;
4035   int c;
4036
4037   detect_info->checked |= CATEGORY_MASK_BIG5;
4038   /* A coding system of this category is always ASCII compatible.  */
4039   src += coding->head_ascii;
4040
4041   while (1)
4042     {
4043       src_base = src;
4044       ONE_MORE_BYTE (c);
4045       if (c < 0x80)
4046         continue;
4047       if (c >= 0xA1)
4048         {
4049           ONE_MORE_BYTE (c);
4050           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4051             return 0;
4052           found = CATEGORY_MASK_BIG5;
4053         }
4054       else
4055         break;
4056     }
4057   detect_info->rejected |= CATEGORY_MASK_BIG5;
4058   return 0;
4059
4060  no_more_source:
4061   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4062     {
4063       detect_info->rejected |= CATEGORY_MASK_BIG5;
4064       return 0;
4065     }
4066   detect_info->found |= found;
4067   return 1;
4068 }
4069
4070 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4071    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4072
4073 static void
4074 decode_coding_sjis (coding)
4075      struct coding_system *coding;
4076 {
4077   const unsigned char *src = coding->source + coding->consumed;
4078   const unsigned char *src_end = coding->source + coding->src_bytes;
4079   const unsigned char *src_base;
4080   int *charbuf = coding->charbuf + coding->charbuf_used;
4081   int *charbuf_end
4082     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4083   int consumed_chars = 0, consumed_chars_base;
4084   int multibytep = coding->src_multibyte;
4085   struct charset *charset_roman, *charset_kanji, *charset_kana;
4086   struct charset *charset_kanji2;
4087   Lisp_Object attrs, charset_list, val;
4088   int char_offset = coding->produced_char;
4089   int last_offset = char_offset;
4090   int last_id = charset_ascii;
4091
4092   CODING_GET_INFO (coding, attrs, charset_list);
4093
4094   val = charset_list;
4095   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4096   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4097   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4098   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4099
4100   while (1)
4101     {
4102       int c, c1;
4103       struct charset *charset;
4104
4105       src_base = src;
4106       consumed_chars_base = consumed_chars;
4107
4108       if (charbuf >= charbuf_end)
4109         break;
4110
4111       ONE_MORE_BYTE (c);
4112       if (c < 0)
4113         goto invalid_code;
4114       if (c < 0x80)
4115         charset = charset_roman;
4116       else if (c == 0x80 || c == 0xA0)
4117         goto invalid_code;
4118       else if (c >= 0xA1 && c <= 0xDF)
4119         {
4120           /* SJIS -> JISX0201-Kana */
4121           c &= 0x7F;
4122           charset = charset_kana;
4123         }
4124       else if (c <= 0xEF)
4125         {
4126           /* SJIS -> JISX0208 */
4127           ONE_MORE_BYTE (c1);
4128           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4129             goto invalid_code;
4130           c = (c << 8) | c1;
4131           SJIS_TO_JIS (c);
4132           charset = charset_kanji;
4133         }
4134       else if (c <= 0xFC && charset_kanji2)
4135         {
4136           /* SJIS -> JISX0213-2 */
4137           ONE_MORE_BYTE (c1);
4138           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4139             goto invalid_code;
4140           c = (c << 8) | c1;
4141           SJIS_TO_JIS2 (c);
4142           charset = charset_kanji2;
4143         }
4144       else
4145         goto invalid_code;
4146       if (charset->id != charset_ascii
4147           && last_id != charset->id)
4148         {
4149           if (last_id != charset_ascii)
4150             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4151           last_id = charset->id;
4152           last_offset = char_offset;
4153         }
4154       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4155       *charbuf++ = c;
4156       char_offset++;
4157       continue;
4158
4159     invalid_code:
4160       src = src_base;
4161       consumed_chars = consumed_chars_base;
4162       ONE_MORE_BYTE (c);
4163       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4164       char_offset++;
4165       coding->errors++;
4166     }
4167
4168  no_more_source:
4169   if (last_id != charset_ascii)
4170     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4171   coding->consumed_char += consumed_chars_base;
4172   coding->consumed = src_base - coding->source;
4173   coding->charbuf_used = charbuf - coding->charbuf;
4174 }
4175
4176 static void
4177 decode_coding_big5 (coding)
4178      struct coding_system *coding;
4179 {
4180   const unsigned char *src = coding->source + coding->consumed;
4181   const unsigned char *src_end = coding->source + coding->src_bytes;
4182   const unsigned char *src_base;
4183   int *charbuf = coding->charbuf + coding->charbuf_used;
4184   int *charbuf_end
4185     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4186   int consumed_chars = 0, consumed_chars_base;
4187   int multibytep = coding->src_multibyte;
4188   struct charset *charset_roman, *charset_big5;
4189   Lisp_Object attrs, charset_list, val;
4190   int char_offset = coding->produced_char;
4191   int last_offset = char_offset;
4192   int last_id = charset_ascii;
4193
4194   CODING_GET_INFO (coding, attrs, charset_list);
4195   val = charset_list;
4196   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4197   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4198
4199   while (1)
4200     {
4201       int c, c1;
4202       struct charset *charset;
4203
4204       src_base = src;
4205       consumed_chars_base = consumed_chars;
4206
4207       if (charbuf >= charbuf_end)
4208         break;
4209
4210       ONE_MORE_BYTE (c);
4211
4212       if (c < 0)
4213         goto invalid_code;
4214       if (c < 0x80)
4215         charset = charset_roman;
4216       else
4217         {
4218           /* BIG5 -> Big5 */
4219           if (c < 0xA1 || c > 0xFE)
4220             goto invalid_code;
4221           ONE_MORE_BYTE (c1);
4222           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4223             goto invalid_code;
4224           c = c << 8 | c1;
4225           charset = charset_big5;
4226         }
4227       if (charset->id != charset_ascii
4228           && last_id != charset->id)
4229         {
4230           if (last_id != charset_ascii)
4231             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4232           last_id = charset->id;
4233           last_offset = char_offset;
4234         }
4235       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4236       *charbuf++ = c;
4237       char_offset++;
4238       continue;
4239
4240     invalid_code:
4241       src = src_base;
4242       consumed_chars = consumed_chars_base;
4243       ONE_MORE_BYTE (c);
4244       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4245       char_offset++;
4246       coding->errors++;
4247     }
4248
4249  no_more_source:
4250   if (last_id != charset_ascii)
4251     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4252   coding->consumed_char += consumed_chars_base;
4253   coding->consumed = src_base - coding->source;
4254   coding->charbuf_used = charbuf - coding->charbuf;
4255 }
4256
4257 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4258    This function can encode charsets `ascii', `katakana-jisx0201',
4259    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4260    are sure that all these charsets are registered as official charset
4261    (i.e. do not have extended leading-codes).  Characters of other
4262    charsets are produced without any encoding.  If SJIS_P is 1, encode
4263    SJIS text, else encode BIG5 text.  */
4264
4265 static int
4266 encode_coding_sjis (coding)
4267      struct coding_system *coding;
4268 {
4269   int multibytep = coding->dst_multibyte;
4270   int *charbuf = coding->charbuf;
4271   int *charbuf_end = charbuf + coding->charbuf_used;
4272   unsigned char *dst = coding->destination + coding->produced;
4273   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4274   int safe_room = 4;
4275   int produced_chars = 0;
4276   Lisp_Object attrs, charset_list, val;
4277   int ascii_compatible;
4278   struct charset *charset_roman, *charset_kanji, *charset_kana;
4279   struct charset *charset_kanji2;
4280   int c;
4281
4282   CODING_GET_INFO (coding, attrs, charset_list);
4283   val = charset_list;
4284   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4285   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4286   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4287   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4288
4289   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4290
4291   while (charbuf < charbuf_end)
4292     {
4293       ASSURE_DESTINATION (safe_room);
4294       c = *charbuf++;
4295       /* Now encode the character C.  */
4296       if (ASCII_CHAR_P (c) && ascii_compatible)
4297         EMIT_ONE_ASCII_BYTE (c);
4298       else if (CHAR_BYTE8_P (c))
4299         {
4300           c = CHAR_TO_BYTE8 (c);
4301           EMIT_ONE_BYTE (c);
4302         }
4303       else
4304         {
4305           unsigned code;
4306           struct charset *charset = char_charset (c, charset_list, &code);
4307
4308           if (!charset)
4309             {
4310               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4311                 {
4312                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4313                   charset = CHARSET_FROM_ID (charset_ascii);
4314                 }
4315               else
4316                 {
4317                   c = coding->default_char;
4318                   charset = char_charset (c, charset_list, &code);
4319                 }
4320             }
4321           if (code == CHARSET_INVALID_CODE (charset))
4322             abort ();
4323           if (charset == charset_kanji)
4324             {
4325               int c1, c2;
4326               JIS_TO_SJIS (code);
4327               c1 = code >> 8, c2 = code & 0xFF;
4328               EMIT_TWO_BYTES (c1, c2);
4329             }
4330           else if (charset == charset_kana)
4331             EMIT_ONE_BYTE (code | 0x80);
4332           else if (charset_kanji2 && charset == charset_kanji2)
4333             {
4334               int c1, c2;
4335
4336               c1 = code >> 8;
4337               if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4338                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4339                 {
4340                   JIS_TO_SJIS2 (code);
4341                   c1 = code >> 8, c2 = code & 0xFF;
4342                   EMIT_TWO_BYTES (c1, c2);
4343                 }
4344               else
4345                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4346             }
4347           else
4348             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4349         }
4350     }
4351   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4352   coding->produced_char += produced_chars;
4353   coding->produced = dst - coding->destination;
4354   return 0;
4355 }
4356
4357 static int
4358 encode_coding_big5 (coding)
4359      struct coding_system *coding;
4360 {
4361   int multibytep = coding->dst_multibyte;
4362   int *charbuf = coding->charbuf;
4363   int *charbuf_end = charbuf + coding->charbuf_used;
4364   unsigned char *dst = coding->destination + coding->produced;
4365   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4366   int safe_room = 4;
4367   int produced_chars = 0;
4368   Lisp_Object attrs, charset_list, val;
4369   int ascii_compatible;
4370   struct charset *charset_roman, *charset_big5;
4371   int c;
4372
4373   CODING_GET_INFO (coding, attrs, charset_list);
4374   val = charset_list;
4375   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4376   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4377   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4378
4379   while (charbuf < charbuf_end)
4380     {
4381       ASSURE_DESTINATION (safe_room);
4382       c = *charbuf++;
4383       /* Now encode the character C.  */
4384       if (ASCII_CHAR_P (c) && ascii_compatible)
4385         EMIT_ONE_ASCII_BYTE (c);
4386       else if (CHAR_BYTE8_P (c))
4387         {
4388           c = CHAR_TO_BYTE8 (c);
4389           EMIT_ONE_BYTE (c);
4390         }
4391       else
4392         {
4393           unsigned code;
4394           struct charset *charset = char_charset (c, charset_list, &code);
4395
4396           if (! charset)
4397             {
4398               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4399                 {
4400                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4401                   charset = CHARSET_FROM_ID (charset_ascii);
4402                 }
4403               else
4404                 {
4405                   c = coding->default_char;
4406                   charset = char_charset (c, charset_list, &code);
4407                 }
4408             }
4409           if (code == CHARSET_INVALID_CODE (charset))
4410             abort ();
4411           if (charset == charset_big5)
4412             {
4413               int c1, c2;
4414
4415               c1 = code >> 8, c2 = code & 0xFF;
4416               EMIT_TWO_BYTES (c1, c2);
4417             }
4418           else
4419             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4420         }
4421     }
4422   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4423   coding->produced_char += produced_chars;
4424   coding->produced = dst - coding->destination;
4425   return 0;
4426 }
4427
4428 \f
4429 /*** 10. CCL handlers ***/
4430
4431 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4432    Check if a text is encoded in a coding system of which
4433    encoder/decoder are written in CCL program.  If it is, return
4434    CATEGORY_MASK_CCL, else return 0.  */
4435
4436 static int
4437 detect_coding_ccl (coding, detect_info)
4438      struct coding_system *coding;
4439      struct coding_detection_info *detect_info;
4440 {
4441   const unsigned char *src = coding->source, *src_base;
4442   const unsigned char *src_end = coding->source + coding->src_bytes;
4443   int multibytep = coding->src_multibyte;
4444   int consumed_chars = 0;
4445   int found = 0;
4446   unsigned char *valids;
4447   int head_ascii = coding->head_ascii;
4448   Lisp_Object attrs;
4449
4450   detect_info->checked |= CATEGORY_MASK_CCL;
4451
4452   coding = &coding_categories[coding_category_ccl];
4453   valids = CODING_CCL_VALIDS (coding);
4454   attrs = CODING_ID_ATTRS (coding->id);
4455   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4456     src += head_ascii;
4457
4458   while (1)
4459     {
4460       int c;
4461
4462       src_base = src;
4463       ONE_MORE_BYTE (c);
4464       if (c < 0 || ! valids[c])
4465         break;
4466       if ((valids[c] > 1))
4467         found = CATEGORY_MASK_CCL;
4468     }
4469   detect_info->rejected |= CATEGORY_MASK_CCL;
4470   return 0;
4471
4472  no_more_source:
4473   detect_info->found |= found;
4474   return 1;
4475 }
4476
4477 static void
4478 decode_coding_ccl (coding)
4479      struct coding_system *coding;
4480 {
4481   const unsigned char *src = coding->source + coding->consumed;
4482   const unsigned char *src_end = coding->source + coding->src_bytes;
4483   int *charbuf = coding->charbuf + coding->charbuf_used;
4484   int *charbuf_end = coding->charbuf + coding->charbuf_size;
4485   int consumed_chars = 0;
4486   int multibytep = coding->src_multibyte;
4487   struct ccl_program ccl;
4488   int source_charbuf[1024];
4489   int source_byteidx[1024];
4490   Lisp_Object attrs, charset_list;
4491
4492   CODING_GET_INFO (coding, attrs, charset_list);
4493   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4494
4495   while (src < src_end)
4496     {
4497       const unsigned char *p = src;
4498       int *source, *source_end;
4499       int i = 0;
4500
4501       if (multibytep)
4502         while (i < 1024 && p < src_end)
4503           {
4504             source_byteidx[i] = p - src;
4505             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4506           }
4507       else
4508         while (i < 1024 && p < src_end)
4509           source_charbuf[i++] = *p++;
4510
4511       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4512         ccl.last_block = 1;
4513
4514       source = source_charbuf;
4515       source_end = source + i;
4516       while (source < source_end)
4517         {
4518           ccl_driver (&ccl, source, charbuf,
4519                       source_end - source, charbuf_end - charbuf,
4520                       charset_list);
4521           source += ccl.consumed;
4522           charbuf += ccl.produced;
4523           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4524             break;
4525         }
4526       if (source < source_end)
4527         src += source_byteidx[source - source_charbuf];
4528       else
4529         src = p;
4530       consumed_chars += source - source_charbuf;
4531
4532       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4533           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4534         break;
4535     }
4536
4537   switch (ccl.status)
4538     {
4539     case CCL_STAT_SUSPEND_BY_SRC:
4540       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4541       break;
4542     case CCL_STAT_SUSPEND_BY_DST:
4543       break;
4544     case CCL_STAT_QUIT:
4545     case CCL_STAT_INVALID_CMD:
4546       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4547       break;
4548     default:
4549       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4550       break;
4551     }
4552   coding->consumed_char += consumed_chars;
4553   coding->consumed = src - coding->source;
4554   coding->charbuf_used = charbuf - coding->charbuf;
4555 }
4556
4557 static int
4558 encode_coding_ccl (coding)
4559      struct coding_system *coding;
4560 {
4561   struct ccl_program ccl;
4562   int multibytep = coding->dst_multibyte;
4563   int *charbuf = coding->charbuf;
4564   int *charbuf_end = charbuf + coding->charbuf_used;
4565   unsigned char *dst = coding->destination + coding->produced;
4566   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4567   int destination_charbuf[1024];
4568   int i, produced_chars = 0;
4569   Lisp_Object attrs, charset_list;
4570
4571   CODING_GET_INFO (coding, attrs, charset_list);
4572   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4573
4574   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4575   ccl.dst_multibyte = coding->dst_multibyte;
4576
4577   while (charbuf < charbuf_end)
4578     {
4579       ccl_driver (&ccl, charbuf, destination_charbuf,
4580                   charbuf_end - charbuf, 1024, charset_list);
4581       if (multibytep)
4582         {
4583           ASSURE_DESTINATION (ccl.produced * 2);
4584           for (i = 0; i < ccl.produced; i++)
4585             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4586         }
4587       else
4588         {
4589           ASSURE_DESTINATION (ccl.produced);
4590           for (i = 0; i < ccl.produced; i++)
4591             *dst++ = destination_charbuf[i] & 0xFF;
4592           produced_chars += ccl.produced;
4593         }
4594       charbuf += ccl.consumed;
4595       if (ccl.status == CCL_STAT_QUIT
4596           || ccl.status == CCL_STAT_INVALID_CMD)
4597         break;
4598     }
4599
4600   switch (ccl.status)
4601     {
4602     case CCL_STAT_SUSPEND_BY_SRC:
4603       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4604       break;
4605     case CCL_STAT_SUSPEND_BY_DST:
4606       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4607       break;
4608     case CCL_STAT_QUIT:
4609     case CCL_STAT_INVALID_CMD:
4610       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4611       break;
4612     default:
4613       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4614       break;
4615     }
4616
4617   coding->produced_char += produced_chars;
4618   coding->produced = dst - coding->destination;
4619   return 0;
4620 }
4621
4622
4623 \f
4624 /*** 10, 11. no-conversion handlers ***/
4625
4626 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4627
4628 static void
4629 decode_coding_raw_text (coding)
4630      struct coding_system *coding;
4631 {
4632   coding->chars_at_source = 1;
4633   coding->consumed_char = 0;
4634   coding->consumed = 0;
4635   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4636 }
4637
4638 static int
4639 encode_coding_raw_text (coding)
4640      struct coding_system *coding;
4641 {
4642   int multibytep = coding->dst_multibyte;
4643   int *charbuf = coding->charbuf;
4644   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4645   unsigned char *dst = coding->destination + coding->produced;
4646   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4647   int produced_chars = 0;
4648   int c;
4649
4650   if (multibytep)
4651     {
4652       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4653
4654       if (coding->src_multibyte)
4655         while (charbuf < charbuf_end)
4656           {
4657             ASSURE_DESTINATION (safe_room);
4658             c = *charbuf++;
4659             if (ASCII_CHAR_P (c))
4660               EMIT_ONE_ASCII_BYTE (c);
4661             else if (CHAR_BYTE8_P (c))
4662               {
4663                 c = CHAR_TO_BYTE8 (c);
4664                 EMIT_ONE_BYTE (c);
4665               }
4666             else
4667               {
4668                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4669
4670                 CHAR_STRING_ADVANCE (c, p1);
4671                 while (p0 < p1)
4672                   {
4673                     EMIT_ONE_BYTE (*p0);
4674                     p0++;
4675                   }
4676               }
4677           }
4678       else
4679         while (charbuf < charbuf_end)
4680           {
4681             ASSURE_DESTINATION (safe_room);
4682             c = *charbuf++;
4683             EMIT_ONE_BYTE (c);
4684           }
4685     }
4686   else
4687     {
4688       if (coding->src_multibyte)
4689         {
4690           int safe_room = MAX_MULTIBYTE_LENGTH;
4691
4692           while (charbuf < charbuf_end)
4693             {
4694               ASSURE_DESTINATION (safe_room);
4695               c = *charbuf++;
4696               if (ASCII_CHAR_P (c))
4697                 *dst++ = c;
4698               else if (CHAR_BYTE8_P (c))
4699                 *dst++ = CHAR_TO_BYTE8 (c);
4700               else
4701                 CHAR_STRING_ADVANCE (c, dst);
4702               produced_chars++;
4703             }
4704         }
4705       else
4706         {
4707           ASSURE_DESTINATION (charbuf_end - charbuf);
4708           while (charbuf < charbuf_end && dst < dst_end)
4709             *dst++ = *charbuf++;
4710           produced_chars = dst - (coding->destination + coding->dst_bytes);
4711         }
4712     }
4713   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4714   coding->produced_char += produced_chars;
4715   coding->produced = dst - coding->destination;
4716   return 0;
4717 }
4718
4719 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4720    Check if a text is encoded in a charset-based coding system.  If it
4721    is, return 1, else return 0.  */
4722
4723 static int
4724 detect_coding_charset (coding, detect_info)
4725      struct coding_system *coding;
4726      struct coding_detection_info *detect_info;
4727 {
4728   const unsigned char *src = coding->source, *src_base;
4729   const unsigned char *src_end = coding->source + coding->src_bytes;
4730   int multibytep = coding->src_multibyte;
4731   int consumed_chars = 0;
4732   Lisp_Object attrs, valids;
4733   int found = 0;
4734   int head_ascii = coding->head_ascii;
4735
4736   detect_info->checked |= CATEGORY_MASK_CHARSET;
4737
4738   coding = &coding_categories[coding_category_charset];
4739   attrs = CODING_ID_ATTRS (coding->id);
4740   valids = AREF (attrs, coding_attr_charset_valids);
4741
4742   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4743     src += head_ascii;
4744
4745   while (1)
4746     {
4747       int c;
4748       Lisp_Object val;
4749       struct charset *charset;
4750       int dim, idx;
4751
4752       src_base = src;
4753       ONE_MORE_BYTE (c);
4754       if (c < 0)
4755         continue;
4756       val = AREF (valids, c);
4757       if (NILP (val))
4758         break;
4759       if (c >= 0x80)
4760         found = CATEGORY_MASK_CHARSET;
4761       if (INTEGERP (val))
4762         {
4763           charset = CHARSET_FROM_ID (XFASTINT (val));
4764           dim = CHARSET_DIMENSION (charset);
4765           for (idx = 1; idx < dim; idx++)
4766             {
4767               if (src == src_end)
4768                 goto too_short;
4769               ONE_MORE_BYTE (c);
4770               if (c < charset->code_space[(dim - 1 - idx) * 2]
4771                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
4772                 break;
4773             }
4774           if (idx < dim)
4775             break;
4776         }
4777       else
4778         {
4779           idx = 1;
4780           for (; CONSP (val); val = XCDR (val))
4781             {
4782               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4783               dim = CHARSET_DIMENSION (charset);
4784               while (idx < dim)
4785                 {
4786                   if (src == src_end)
4787                     goto too_short;
4788                   ONE_MORE_BYTE (c);
4789                   if (c < charset->code_space[(dim - 1 - idx) * 4]
4790                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
4791                     break;
4792                   idx++;
4793                 }
4794               if (idx == dim)
4795                 {
4796                   val = Qnil;
4797                   break;
4798                 }
4799             }
4800           if (CONSP (val))
4801             break;
4802         }
4803     }
4804  too_short:
4805   detect_info->rejected |= CATEGORY_MASK_CHARSET;
4806   return 0;
4807
4808  no_more_source:
4809   detect_info->found |= found;
4810   return 1;
4811 }
4812
4813 static void
4814 decode_coding_charset (coding)
4815      struct coding_system *coding;
4816 {
4817   const unsigned char *src = coding->source + coding->consumed;
4818   const unsigned char *src_end = coding->source + coding->src_bytes;
4819   const unsigned char *src_base;
4820   int *charbuf = coding->charbuf + coding->charbuf_used;
4821   int *charbuf_end
4822     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4823   int consumed_chars = 0, consumed_chars_base;
4824   int multibytep = coding->src_multibyte;
4825   Lisp_Object attrs, charset_list, valids;
4826   int char_offset = coding->produced_char;
4827   int last_offset = char_offset;
4828   int last_id = charset_ascii;
4829
4830   CODING_GET_INFO (coding, attrs, charset_list);
4831   valids = AREF (attrs, coding_attr_charset_valids);
4832
4833   while (1)
4834     {
4835       int c;
4836       Lisp_Object val;
4837       struct charset *charset;
4838       int dim;
4839       int len = 1;
4840       unsigned code;
4841
4842       src_base = src;
4843       consumed_chars_base = consumed_chars;
4844
4845       if (charbuf >= charbuf_end)
4846         break;
4847
4848       ONE_MORE_BYTE (c);
4849       if (c < 0)
4850         goto invalid_code;
4851       code = c;
4852
4853       val = AREF (valids, c);
4854       if (NILP (val))
4855         goto invalid_code;
4856       if (INTEGERP (val))
4857         {
4858           charset = CHARSET_FROM_ID (XFASTINT (val));
4859           dim = CHARSET_DIMENSION (charset);
4860           while (len < dim)
4861             {
4862               ONE_MORE_BYTE (c);
4863               code = (code << 8) | c;
4864               len++;
4865             }
4866           CODING_DECODE_CHAR (coding, src, src_base, src_end,
4867                               charset, code, c);
4868         }
4869       else
4870         {
4871           /* VAL is a list of charset IDs.  It is assured that the
4872              list is sorted by charset dimensions (smaller one
4873              comes first).  */
4874           while (CONSP (val))
4875             {
4876               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4877               dim = CHARSET_DIMENSION (charset);
4878               while (len < dim)
4879                 {
4880                   ONE_MORE_BYTE (c);
4881                   code = (code << 8) | c;
4882                   len++;
4883                 }
4884               CODING_DECODE_CHAR (coding, src, src_base,
4885                                   src_end, charset, code, c);
4886               if (c >= 0)
4887                 break;
4888               val = XCDR (val);
4889             }
4890         }
4891       if (c < 0)
4892         goto invalid_code;
4893       if (charset->id != charset_ascii
4894           && last_id != charset->id)
4895         {
4896           if (last_id != charset_ascii)
4897             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4898           last_id = charset->id;
4899           last_offset = char_offset;
4900         }
4901
4902       *charbuf++ = c;
4903       char_offset++;
4904       continue;
4905
4906     invalid_code:
4907       src = src_base;
4908       consumed_chars = consumed_chars_base;
4909       ONE_MORE_BYTE (c);
4910       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4911       char_offset++;
4912       coding->errors++;
4913     }
4914
4915  no_more_source:
4916   if (last_id != charset_ascii)
4917     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4918   coding->consumed_char += consumed_chars_base;
4919   coding->consumed = src_base - coding->source;
4920   coding->charbuf_used = charbuf - coding->charbuf;
4921 }
4922
4923 static int
4924 encode_coding_charset (coding)
4925      struct coding_system *coding;
4926 {
4927   int multibytep = coding->dst_multibyte;
4928   int *charbuf = coding->charbuf;
4929   int *charbuf_end = charbuf + coding->charbuf_used;
4930   unsigned char *dst = coding->destination + coding->produced;
4931   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4932   int safe_room = MAX_MULTIBYTE_LENGTH;
4933   int produced_chars = 0;
4934   Lisp_Object attrs, charset_list;
4935   int ascii_compatible;
4936   int c;
4937
4938   CODING_GET_INFO (coding, attrs, charset_list);
4939   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4940
4941   while (charbuf < charbuf_end)
4942     {
4943       struct charset *charset;
4944       unsigned code;
4945
4946       ASSURE_DESTINATION (safe_room);
4947       c = *charbuf++;
4948       if (ascii_compatible && ASCII_CHAR_P (c))
4949         EMIT_ONE_ASCII_BYTE (c);
4950       else if (CHAR_BYTE8_P (c))
4951         {
4952           c = CHAR_TO_BYTE8 (c);
4953           EMIT_ONE_BYTE (c);
4954         }
4955       else
4956         {
4957           charset = char_charset (c, charset_list, &code);
4958           if (charset)
4959             {
4960               if (CHARSET_DIMENSION (charset) == 1)
4961                 EMIT_ONE_BYTE (code);
4962               else if (CHARSET_DIMENSION (charset) == 2)
4963                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4964               else if (CHARSET_DIMENSION (charset) == 3)
4965                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4966               else
4967                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4968                                  (code >> 8) & 0xFF, code & 0xFF);
4969             }
4970           else
4971             {
4972               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4973                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4974               else
4975                 c = coding->default_char;
4976               EMIT_ONE_BYTE (c);
4977             }
4978         }
4979     }
4980
4981   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4982   coding->produced_char += produced_chars;
4983   coding->produced = dst - coding->destination;
4984   return 0;
4985 }
4986
4987 \f
4988 /*** 7. C library functions ***/
4989
4990 /* Setup coding context CODING from information about CODING_SYSTEM.
4991    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
4992    CODING_SYSTEM is invalid, signal an error.  */
4993
4994 void
4995 setup_coding_system (coding_system, coding)
4996      Lisp_Object coding_system;
4997      struct coding_system *coding;
4998 {
4999   Lisp_Object attrs;
5000   Lisp_Object eol_type;
5001   Lisp_Object coding_type;
5002   Lisp_Object val;
5003
5004   if (NILP (coding_system))
5005     coding_system = Qundecided;
5006
5007   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5008
5009   attrs = CODING_ID_ATTRS (coding->id);
5010   eol_type = CODING_ID_EOL_TYPE (coding->id);
5011
5012   coding->mode = 0;
5013   coding->head_ascii = -1;
5014   if (VECTORP (eol_type))
5015     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5016                             | CODING_REQUIRE_DETECTION_MASK);
5017   else if (! EQ (eol_type, Qunix))
5018     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5019                             | CODING_REQUIRE_ENCODING_MASK);
5020   else
5021     coding->common_flags = 0;
5022   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5023     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5024   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5025     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5026   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5027     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5028
5029   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5030   coding->max_charset_id = SCHARS (val) - 1;
5031   coding->safe_charsets = (char *) SDATA (val);
5032   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5033
5034   coding_type = CODING_ATTR_TYPE (attrs);
5035   if (EQ (coding_type, Qundecided))
5036     {
5037       coding->detector = NULL;
5038       coding->decoder = decode_coding_raw_text;
5039       coding->encoder = encode_coding_raw_text;
5040       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5041     }
5042   else if (EQ (coding_type, Qiso_2022))
5043     {
5044       int i;
5045       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5046
5047       /* Invoke graphic register 0 to plane 0.  */
5048       CODING_ISO_INVOCATION (coding, 0) = 0;
5049       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5050       CODING_ISO_INVOCATION (coding, 1)
5051         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5052       /* Setup the initial status of designation.  */
5053       for (i = 0; i < 4; i++)
5054         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5055       /* Not single shifting initially.  */
5056       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5057       /* Beginning of buffer should also be regarded as bol. */
5058       CODING_ISO_BOL (coding) = 1;
5059       coding->detector = detect_coding_iso_2022;
5060       coding->decoder = decode_coding_iso_2022;
5061       coding->encoder = encode_coding_iso_2022;
5062       if (flags & CODING_ISO_FLAG_SAFE)
5063         coding->mode |= CODING_MODE_SAFE_ENCODING;
5064       coding->common_flags
5065         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5066             | CODING_REQUIRE_FLUSHING_MASK);
5067       if (flags & CODING_ISO_FLAG_COMPOSITION)
5068         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5069       if (flags & CODING_ISO_FLAG_DESIGNATION)
5070         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5071       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5072         {
5073           setup_iso_safe_charsets (attrs);
5074           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5075           coding->max_charset_id = SCHARS (val) - 1;
5076           coding->safe_charsets = (char *) SDATA (val);
5077         }
5078       CODING_ISO_FLAGS (coding) = flags;
5079     }
5080   else if (EQ (coding_type, Qcharset))
5081     {
5082       coding->detector = detect_coding_charset;
5083       coding->decoder = decode_coding_charset;
5084       coding->encoder = encode_coding_charset;
5085       coding->common_flags
5086         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5087     }
5088   else if (EQ (coding_type, Qutf_8))
5089     {
5090       coding->detector = detect_coding_utf_8;
5091       coding->decoder = decode_coding_utf_8;
5092       coding->encoder = encode_coding_utf_8;
5093       coding->common_flags
5094         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5095     }
5096   else if (EQ (coding_type, Qutf_16))
5097     {
5098       val = AREF (attrs, coding_attr_utf_16_bom);
5099       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
5100                                     : EQ (val, Qt) ? utf_16_with_bom
5101                                     : utf_16_without_bom);
5102       val = AREF (attrs, coding_attr_utf_16_endian);
5103       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5104                                        : utf_16_little_endian);
5105       CODING_UTF_16_SURROGATE (coding) = 0;
5106       coding->detector = detect_coding_utf_16;
5107       coding->decoder = decode_coding_utf_16;
5108       coding->encoder = encode_coding_utf_16;
5109       coding->common_flags
5110         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5111       if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
5112         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5113     }
5114   else if (EQ (coding_type, Qccl))
5115     {
5116       coding->detector = detect_coding_ccl;
5117       coding->decoder = decode_coding_ccl;
5118       coding->encoder = encode_coding_ccl;
5119       coding->common_flags
5120         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5121             | CODING_REQUIRE_FLUSHING_MASK);
5122     }
5123   else if (EQ (coding_type, Qemacs_mule))
5124     {
5125       coding->detector = detect_coding_emacs_mule;
5126       coding->decoder = decode_coding_emacs_mule;
5127       coding->encoder = encode_coding_emacs_mule;
5128       coding->common_flags
5129         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5130       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5131           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5132         {
5133           Lisp_Object tail, safe_charsets;
5134           int max_charset_id = 0;
5135
5136           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5137                tail = XCDR (tail))
5138             if (max_charset_id < XFASTINT (XCAR (tail)))
5139               max_charset_id = XFASTINT (XCAR (tail));
5140           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5141                                         make_number (255));
5142           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5143                tail = XCDR (tail))
5144             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5145           coding->max_charset_id = max_charset_id;
5146           coding->safe_charsets = (char *) SDATA (safe_charsets);
5147         }
5148     }
5149   else if (EQ (coding_type, Qshift_jis))
5150     {
5151       coding->detector = detect_coding_sjis;
5152       coding->decoder = decode_coding_sjis;
5153       coding->encoder = encode_coding_sjis;
5154       coding->common_flags
5155         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5156     }
5157   else if (EQ (coding_type, Qbig5))
5158     {
5159       coding->detector = detect_coding_big5;
5160       coding->decoder = decode_coding_big5;
5161       coding->encoder = encode_coding_big5;
5162       coding->common_flags
5163         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5164     }
5165   else                          /* EQ (coding_type, Qraw_text) */
5166     {
5167       coding->detector = NULL;
5168       coding->decoder = decode_coding_raw_text;
5169       coding->encoder = encode_coding_raw_text;
5170       if (! EQ (eol_type, Qunix))
5171         {
5172           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5173           if (! VECTORP (eol_type))
5174             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5175         }
5176
5177     }
5178
5179   return;
5180 }
5181
5182 /* Return a list of charsets supported by CODING.  */
5183
5184 Lisp_Object
5185 coding_charset_list (coding)
5186      struct coding_system *coding;
5187 {
5188   Lisp_Object attrs, charset_list;
5189
5190   CODING_GET_INFO (coding, attrs, charset_list);
5191   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5192     {
5193       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5194
5195       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5196         charset_list = Viso_2022_charset_list;
5197     }
5198   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5199     {
5200       charset_list = Vemacs_mule_charset_list;
5201     }
5202   return charset_list;
5203 }
5204
5205
5206 /* Return raw-text or one of its subsidiaries that has the same
5207    eol_type as CODING-SYSTEM.  */
5208
5209 Lisp_Object
5210 raw_text_coding_system (coding_system)
5211      Lisp_Object coding_system;
5212 {
5213   Lisp_Object spec, attrs;
5214   Lisp_Object eol_type, raw_text_eol_type;
5215
5216   if (NILP (coding_system))
5217     return Qraw_text;
5218   spec = CODING_SYSTEM_SPEC (coding_system);
5219   attrs = AREF (spec, 0);
5220
5221   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5222     return coding_system;
5223
5224   eol_type = AREF (spec, 2);
5225   if (VECTORP (eol_type))
5226     return Qraw_text;
5227   spec = CODING_SYSTEM_SPEC (Qraw_text);
5228   raw_text_eol_type = AREF (spec, 2);
5229   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5230           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5231           : AREF (raw_text_eol_type, 2));
5232 }
5233
5234
5235 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5236    does, return one of the subsidiary that has the same eol-spec as
5237    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
5238    inherit end-of-line format from the system's setting
5239    (system_eol_type).  */
5240
5241 Lisp_Object
5242 coding_inherit_eol_type (coding_system, parent)
5243      Lisp_Object coding_system, parent;
5244 {
5245   Lisp_Object spec, eol_type;
5246
5247   if (NILP (coding_system))
5248     coding_system = Qraw_text;
5249   spec = CODING_SYSTEM_SPEC (coding_system);
5250   eol_type = AREF (spec, 2);
5251   if (VECTORP (eol_type))
5252     {
5253       Lisp_Object parent_eol_type;
5254
5255       if (! NILP (parent))
5256         {
5257           Lisp_Object parent_spec;
5258
5259           parent_spec = CODING_SYSTEM_SPEC (parent);
5260           parent_eol_type = AREF (parent_spec, 2);
5261         }
5262       else
5263         parent_eol_type = system_eol_type;
5264       if (EQ (parent_eol_type, Qunix))
5265         coding_system = AREF (eol_type, 0);
5266       else if (EQ (parent_eol_type, Qdos))
5267         coding_system = AREF (eol_type, 1);
5268       else if (EQ (parent_eol_type, Qmac))
5269         coding_system = AREF (eol_type, 2);
5270     }
5271   return coding_system;
5272 }
5273
5274 /* Emacs has a mechanism to automatically detect a coding system if it
5275    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5276    it's impossible to distinguish some coding systems accurately
5277    because they use the same range of codes.  So, at first, coding
5278    systems are categorized into 7, those are:
5279
5280    o coding-category-emacs-mule
5281
5282         The category for a coding system which has the same code range
5283         as Emacs' internal format.  Assigned the coding-system (Lisp
5284         symbol) `emacs-mule' by default.
5285
5286    o coding-category-sjis
5287
5288         The category for a coding system which has the same code range
5289         as SJIS.  Assigned the coding-system (Lisp
5290         symbol) `japanese-shift-jis' by default.
5291
5292    o coding-category-iso-7
5293
5294         The category for a coding system which has the same code range
5295         as ISO2022 of 7-bit environment.  This doesn't use any locking
5296         shift and single shift functions.  This can encode/decode all
5297         charsets.  Assigned the coding-system (Lisp symbol)
5298         `iso-2022-7bit' by default.
5299
5300    o coding-category-iso-7-tight
5301
5302         Same as coding-category-iso-7 except that this can
5303         encode/decode only the specified charsets.
5304
5305    o coding-category-iso-8-1
5306
5307         The category for a coding system which has the same code range
5308         as ISO2022 of 8-bit environment and graphic plane 1 used only
5309         for DIMENSION1 charset.  This doesn't use any locking shift
5310         and single shift functions.  Assigned the coding-system (Lisp
5311         symbol) `iso-latin-1' by default.
5312
5313    o coding-category-iso-8-2
5314
5315         The category for a coding system which has the same code range
5316         as ISO2022 of 8-bit environment and graphic plane 1 used only
5317         for DIMENSION2 charset.  This doesn't use any locking shift
5318         and single shift functions.  Assigned the coding-system (Lisp
5319         symbol) `japanese-iso-8bit' by default.
5320
5321    o coding-category-iso-7-else
5322
5323         The category for a coding system which has the same code range
5324         as ISO2022 of 7-bit environemnt but uses locking shift or
5325         single shift functions.  Assigned the coding-system (Lisp
5326         symbol) `iso-2022-7bit-lock' by default.
5327
5328    o coding-category-iso-8-else
5329
5330         The category for a coding system which has the same code range
5331         as ISO2022 of 8-bit environemnt but uses locking shift or
5332         single shift functions.  Assigned the coding-system (Lisp
5333         symbol) `iso-2022-8bit-ss2' by default.
5334
5335    o coding-category-big5
5336
5337         The category for a coding system which has the same code range
5338         as BIG5.  Assigned the coding-system (Lisp symbol)
5339         `cn-big5' by default.
5340
5341    o coding-category-utf-8
5342
5343         The category for a coding system which has the same code range
5344         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
5345         symbol) `utf-8' by default.
5346
5347    o coding-category-utf-16-be
5348
5349         The category for a coding system in which a text has an
5350         Unicode signature (cf. Unicode Standard) in the order of BIG
5351         endian at the head.  Assigned the coding-system (Lisp symbol)
5352         `utf-16-be' by default.
5353
5354    o coding-category-utf-16-le
5355
5356         The category for a coding system in which a text has an
5357         Unicode signature (cf. Unicode Standard) in the order of
5358         LITTLE endian at the head.  Assigned the coding-system (Lisp
5359         symbol) `utf-16-le' by default.
5360
5361    o coding-category-ccl
5362
5363         The category for a coding system of which encoder/decoder is
5364         written in CCL programs.  The default value is nil, i.e., no
5365         coding system is assigned.
5366
5367    o coding-category-binary
5368
5369         The category for a coding system not categorized in any of the
5370         above.  Assigned the coding-system (Lisp symbol)
5371         `no-conversion' by default.
5372
5373    Each of them is a Lisp symbol and the value is an actual
5374    `coding-system's (this is also a Lisp symbol) assigned by a user.
5375    What Emacs does actually is to detect a category of coding system.
5376    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5377    decide only one possible category, it selects a category of the
5378    highest priority.  Priorities of categories are also specified by a
5379    user in a Lisp variable `coding-category-list'.
5380
5381 */
5382
5383 #define EOL_SEEN_NONE   0
5384 #define EOL_SEEN_LF     1
5385 #define EOL_SEEN_CR     2
5386 #define EOL_SEEN_CRLF   4
5387
5388 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5389    SOURCE is encoded.  If CATEGORY is one of
5390    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5391    two-byte, else they are encoded by one-byte.
5392
5393    Return one of EOL_SEEN_XXX.  */
5394
5395 #define MAX_EOL_CHECK_COUNT 3
5396
5397 static int
5398 detect_eol (source, src_bytes, category)
5399      const unsigned char *source;
5400      EMACS_INT src_bytes;
5401      enum coding_category category;
5402 {
5403   const unsigned char *src = source, *src_end = src + src_bytes;
5404   unsigned char c;
5405   int total  = 0;
5406   int eol_seen = EOL_SEEN_NONE;
5407
5408   if ((1 << category) & CATEGORY_MASK_UTF_16)
5409     {
5410       int msb, lsb;
5411
5412       msb = category == (coding_category_utf_16_le
5413                          | coding_category_utf_16_le_nosig);
5414       lsb = 1 - msb;
5415
5416       while (src + 1 < src_end)
5417         {
5418           c = src[lsb];
5419           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5420             {
5421               int this_eol;
5422
5423               if (c == '\n')
5424                 this_eol = EOL_SEEN_LF;
5425               else if (src + 3 >= src_end
5426                        || src[msb + 2] != 0
5427                        || src[lsb + 2] != '\n')
5428                 this_eol = EOL_SEEN_CR;
5429               else
5430                 this_eol = EOL_SEEN_CRLF;
5431
5432               if (eol_seen == EOL_SEEN_NONE)
5433                 /* This is the first end-of-line.  */
5434                 eol_seen = this_eol;
5435               else if (eol_seen != this_eol)
5436                 {
5437                   /* The found type is different from what found before.  */
5438                   eol_seen = EOL_SEEN_LF;
5439                   break;
5440                 }
5441               if (++total == MAX_EOL_CHECK_COUNT)
5442                 break;
5443             }
5444           src += 2;
5445         }
5446     }
5447   else
5448     {
5449       while (src < src_end)
5450         {
5451           c = *src++;
5452           if (c == '\n' || c == '\r')
5453             {
5454               int this_eol;
5455
5456               if (c == '\n')
5457                 this_eol = EOL_SEEN_LF;
5458               else if (src >= src_end || *src != '\n')
5459                 this_eol = EOL_SEEN_CR;
5460               else
5461                 this_eol = EOL_SEEN_CRLF, src++;
5462
5463               if (eol_seen == EOL_SEEN_NONE)
5464                 /* This is the first end-of-line.  */
5465                 eol_seen = this_eol;
5466               else if (eol_seen != this_eol)
5467                 {
5468                   /* The found type is different from what found before.  */
5469                   eol_seen = EOL_SEEN_LF;
5470                   break;
5471                 }
5472               if (++total == MAX_EOL_CHECK_COUNT)
5473                 break;
5474             }
5475         }
5476     }
5477   return eol_seen;
5478 }
5479
5480
5481 static Lisp_Object
5482 adjust_coding_eol_type (coding, eol_seen)
5483      struct coding_system *coding;
5484      int eol_seen;
5485 {
5486   Lisp_Object eol_type;
5487
5488   eol_type = CODING_ID_EOL_TYPE (coding->id);
5489   if (eol_seen & EOL_SEEN_LF)
5490     {
5491       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5492       eol_type = Qunix;
5493     }
5494   else if (eol_seen & EOL_SEEN_CRLF)
5495     {
5496       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5497       eol_type = Qdos;
5498     }
5499   else if (eol_seen & EOL_SEEN_CR)
5500     {
5501       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5502       eol_type = Qmac;
5503     }
5504   return eol_type;
5505 }
5506
5507 /* Detect how a text specified in CODING is encoded.  If a coding
5508    system is detected, update fields of CODING by the detected coding
5509    system.  */
5510
5511 void
5512 detect_coding (coding)
5513      struct coding_system *coding;
5514 {
5515   const unsigned char *src, *src_end;
5516
5517   coding->consumed = coding->consumed_char = 0;
5518   coding->produced = coding->produced_char = 0;
5519   coding_set_source (coding);
5520
5521   src_end = coding->source + coding->src_bytes;
5522
5523   /* If we have not yet decided the text encoding type, detect it
5524      now.  */
5525   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5526     {
5527       int c, i;
5528       struct coding_detection_info detect_info;
5529
5530       detect_info.checked = detect_info.found = detect_info.rejected = 0;
5531       for (i = 0, src = coding->source; src < src_end; i++, src++)
5532         {
5533           c = *src;
5534           if (c & 0x80)
5535             break;
5536           if (c < 0x20
5537               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
5538               && ! inhibit_iso_escape_detection
5539               && ! detect_info.checked)
5540             {
5541               coding->head_ascii = src - (coding->source + coding->consumed);
5542               if (detect_coding_iso_2022 (coding, &detect_info))
5543                 {
5544                   /* We have scanned the whole data.  */
5545                   if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
5546                     /* We didn't find an 8-bit code.  */
5547                     src = src_end;
5548                   break;
5549                 }
5550             }
5551         }
5552       coding->head_ascii = src - (coding->source + coding->consumed);
5553
5554       if (coding->head_ascii < coding->src_bytes
5555           || detect_info.found)
5556         {
5557           enum coding_category category;
5558           struct coding_system *this;
5559
5560           if (coding->head_ascii == coding->src_bytes)
5561             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
5562             for (i = 0; i < coding_category_raw_text; i++)
5563               {
5564                 category = coding_priorities[i];
5565                 this = coding_categories + category;
5566                 if (detect_info.found & (1 << category))
5567                   break;
5568               }
5569           else
5570             for (i = 0; i < coding_category_raw_text; i++)
5571               {
5572                 category = coding_priorities[i];
5573                 this = coding_categories + category;
5574                 if (this->id < 0)
5575                   {
5576                     /* No coding system of this category is defined.  */
5577                     detect_info.rejected |= (1 << category);
5578                   }
5579                 else if (category >= coding_category_raw_text)
5580                   continue;
5581                 else if (detect_info.checked & (1 << category))
5582                   {
5583                     if (detect_info.found & (1 << category))
5584                       break;
5585                   }
5586                 else if ((*(this->detector)) (coding, &detect_info)
5587                          && detect_info.found & (1 << category))
5588                   {
5589                     if (category == coding_category_utf_16_auto)
5590                       {
5591                         if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5592                           category = coding_category_utf_16_le;
5593                         else
5594                           category = coding_category_utf_16_be;
5595                       }
5596                     break;
5597                   }
5598               }
5599
5600           if (i < coding_category_raw_text)
5601             setup_coding_system (CODING_ID_NAME (this->id), coding);
5602           else if (detect_info.rejected == CATEGORY_MASK_ANY)
5603             setup_coding_system (Qraw_text, coding);
5604           else if (detect_info.rejected)
5605             for (i = 0; i < coding_category_raw_text; i++)
5606               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5607                 {
5608                   this = coding_categories + coding_priorities[i];
5609                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5610                   break;
5611                 }
5612         }
5613     }
5614   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5615            == coding_category_utf_16_auto)
5616     {
5617       Lisp_Object coding_systems;
5618       struct coding_detection_info detect_info;
5619
5620       coding_systems
5621         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5622       detect_info.found = detect_info.rejected = 0;
5623       if (CONSP (coding_systems)
5624           && detect_coding_utf_16 (coding, &detect_info))
5625         {
5626           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5627             setup_coding_system (XCAR (coding_systems), coding);
5628           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
5629             setup_coding_system (XCDR (coding_systems), coding);
5630         }
5631     }
5632 }
5633
5634
5635 static void
5636 decode_eol (coding)
5637      struct coding_system *coding;
5638 {
5639   Lisp_Object eol_type;
5640   unsigned char *p, *pbeg, *pend;
5641
5642   eol_type = CODING_ID_EOL_TYPE (coding->id);
5643   if (EQ (eol_type, Qunix))
5644     return;
5645
5646   if (NILP (coding->dst_object))
5647     pbeg = coding->destination;
5648   else
5649     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5650   pend = pbeg + coding->produced;
5651
5652   if (VECTORP (eol_type))
5653     {
5654       int eol_seen = EOL_SEEN_NONE;
5655
5656       for (p = pbeg; p < pend; p++)
5657         {
5658           if (*p == '\n')
5659             eol_seen |= EOL_SEEN_LF;
5660           else if (*p == '\r')
5661             {
5662               if (p + 1 < pend && *(p + 1) == '\n')
5663                 {
5664                   eol_seen |= EOL_SEEN_CRLF;
5665                   p++;
5666                 }
5667               else
5668                 eol_seen |= EOL_SEEN_CR;
5669             }
5670         }
5671       if (eol_seen != EOL_SEEN_NONE
5672           && eol_seen != EOL_SEEN_LF
5673           && eol_seen != EOL_SEEN_CRLF
5674           && eol_seen != EOL_SEEN_CR)
5675         eol_seen = EOL_SEEN_LF;
5676       if (eol_seen != EOL_SEEN_NONE)
5677         eol_type = adjust_coding_eol_type (coding, eol_seen);
5678     }
5679
5680   if (EQ (eol_type, Qmac))
5681     {
5682       for (p = pbeg; p < pend; p++)
5683         if (*p == '\r')
5684           *p = '\n';
5685     }
5686   else if (EQ (eol_type, Qdos))
5687     {
5688       int n = 0;
5689
5690       if (NILP (coding->dst_object))
5691         {
5692           /* Start deleting '\r' from the tail to minimize the memory
5693              movement.  */
5694           for (p = pend - 2; p >= pbeg; p--)
5695             if (*p == '\r')
5696               {
5697                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5698                 n++;
5699               }
5700         }
5701       else
5702         {
5703           int pos_byte = coding->dst_pos_byte;
5704           int pos = coding->dst_pos;
5705           int pos_end = pos + coding->produced_char - 1;
5706
5707           while (pos < pos_end)
5708             {
5709               p = BYTE_POS_ADDR (pos_byte);
5710               if (*p == '\r' && p[1] == '\n')
5711                 {
5712                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5713                   n++;
5714                   pos_end--;
5715                 }
5716               pos++;
5717               pos_byte += BYTES_BY_CHAR_HEAD (*p);
5718             }
5719         }
5720       coding->produced -= n;
5721       coding->produced_char -= n;
5722     }
5723 }
5724
5725
5726 /* Return a translation table (or list of them) from coding system
5727    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5728    decoding (ENCODEP is zero). */
5729
5730 static Lisp_Object
5731 get_translation_table (attrs, encodep, max_lookup)
5732      Lisp_Object attrs;
5733      int encodep, *max_lookup;
5734 {
5735   Lisp_Object standard, translation_table;
5736   Lisp_Object val;
5737
5738   if (encodep)
5739     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
5740       standard = Vstandard_translation_table_for_encode;
5741   else
5742     translation_table = CODING_ATTR_DECODE_TBL (attrs),
5743       standard = Vstandard_translation_table_for_decode;
5744   if (NILP (translation_table))
5745     translation_table = standard;
5746   else
5747     {
5748       if (SYMBOLP (translation_table))
5749         translation_table = Fget (translation_table, Qtranslation_table);
5750       else if (CONSP (translation_table))
5751         {
5752           translation_table = Fcopy_sequence (translation_table);
5753           for (val = translation_table; CONSP (val); val = XCDR (val))
5754             if (SYMBOLP (XCAR (val)))
5755               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
5756         }
5757       if (CHAR_TABLE_P (standard))
5758         {
5759           if (CONSP (translation_table))
5760             translation_table = nconc2 (translation_table,
5761                                         Fcons (standard, Qnil));
5762           else
5763             translation_table = Fcons (translation_table,
5764                                        Fcons (standard, Qnil));
5765         }
5766     }
5767
5768   if (max_lookup)
5769     {
5770       *max_lookup = 1;
5771       if (CHAR_TABLE_P (translation_table)
5772           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
5773         {
5774           val = XCHAR_TABLE (translation_table)->extras[1];
5775           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5776             *max_lookup = XFASTINT (val);
5777         }
5778       else if (CONSP (translation_table))
5779         {
5780           Lisp_Object tail, val;
5781
5782           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
5783             if (CHAR_TABLE_P (XCAR (tail))
5784                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
5785               {
5786                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
5787                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5788                   *max_lookup = XFASTINT (val);
5789               }
5790         }
5791     }
5792   return translation_table;
5793 }
5794
5795 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
5796   do {                                                          \
5797     trans = Qnil;                                               \
5798     if (CHAR_TABLE_P (table))                                   \
5799       {                                                         \
5800         trans = CHAR_TABLE_REF (table, c);                      \
5801         if (CHARACTERP (trans))                                 \
5802           c = XFASTINT (trans), trans = Qnil;                   \
5803       }                                                         \
5804     else if (CONSP (table))                                     \
5805       {                                                         \
5806         Lisp_Object tail;                                       \
5807                                                                 \
5808         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
5809           if (CHAR_TABLE_P (XCAR (tail)))                       \
5810             {                                                   \
5811               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
5812               if (CHARACTERP (trans))                           \
5813                 c = XFASTINT (trans), trans = Qnil;             \
5814               else if (! NILP (trans))                          \
5815                 break;                                          \
5816             }                                                   \
5817       }                                                         \
5818   } while (0)
5819
5820
5821 static Lisp_Object
5822 get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
5823      Lisp_Object val;
5824      int *buf, *buf_end;
5825      int last_block;
5826      int *from_nchars, *to_nchars;
5827 {
5828   /* VAL is TO or (([FROM-CHAR ...] .  TO) ...) where TO is TO-CHAR or
5829      [TO-CHAR ...].  */
5830   if (CONSP (val))
5831     {
5832       Lisp_Object from, tail;
5833       int i, len;
5834
5835       for (tail = val; CONSP (tail); tail = XCDR (tail))
5836         {
5837           val = XCAR (tail);
5838           from = XCAR (val);
5839           len = ASIZE (from);
5840           for (i = 0; i < len; i++)
5841             {
5842               if (buf + i == buf_end)
5843                 {
5844                   if (! last_block)
5845                     return Qt;
5846                   break;
5847                 }
5848               if (XINT (AREF (from, i)) != buf[i])
5849                 break;
5850             }
5851           if (i == len)
5852             {
5853               val = XCDR (val);
5854               *from_nchars = len;
5855               break;
5856             }
5857         }
5858       if (! CONSP (tail))
5859         return Qnil;
5860     }
5861   if (VECTORP (val))
5862     *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
5863   else
5864     *buf = XINT (val);
5865   return val;
5866 }
5867
5868
5869 static int
5870 produce_chars (coding, translation_table, last_block)
5871      struct coding_system *coding;
5872      Lisp_Object translation_table;
5873      int last_block;
5874 {
5875   unsigned char *dst = coding->destination + coding->produced;
5876   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5877   int produced;
5878   int produced_chars = 0;
5879   int carryover = 0;
5880
5881   if (! coding->chars_at_source)
5882     {
5883       /* Characters are in coding->charbuf.  */
5884       int *buf = coding->charbuf;
5885       int *buf_end = buf + coding->charbuf_used;
5886
5887       if (BUFFERP (coding->src_object)
5888           && EQ (coding->src_object, coding->dst_object))
5889         dst_end = ((unsigned char *) coding->source) + coding->consumed;
5890
5891       while (buf < buf_end)
5892         {
5893           int c = *buf, i;
5894
5895           if (c >= 0)
5896             {
5897               int from_nchars = 1, to_nchars = 1;
5898               Lisp_Object trans = Qnil;
5899
5900               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
5901               if (! NILP (trans))
5902                 {
5903                   trans = get_translation (trans, buf, buf_end, last_block,
5904                                            &from_nchars, &to_nchars);
5905                   if (EQ (trans, Qt))
5906                     break;
5907                   c = *buf;
5908                 }
5909
5910               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
5911                 {
5912                   dst = alloc_destination (coding,
5913                                            buf_end - buf
5914                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
5915                                            dst);
5916                   dst_end = coding->destination + coding->dst_bytes;
5917                 }
5918
5919               for (i = 0; i < to_nchars; i++)
5920                 {
5921                   if (i > 0)
5922                     c = XINT (AREF (trans, i));
5923                   if (coding->dst_multibyte
5924                       || ! CHAR_BYTE8_P (c))
5925                     CHAR_STRING_ADVANCE (c, dst);
5926                   else
5927                     *dst++ = CHAR_TO_BYTE8 (c);
5928                 }
5929               produced_chars += to_nchars;
5930               *buf++ = to_nchars;
5931               while (--from_nchars > 0)
5932                 *buf++ = 0;
5933             }
5934           else
5935             /* This is an annotation datum.  (-C) is the length.  */
5936             buf += -c;
5937         }
5938       carryover = buf_end - buf;
5939     }
5940   else
5941     {
5942       const unsigned char *src = coding->source;
5943       const unsigned char *src_end = src + coding->src_bytes;
5944       Lisp_Object eol_type;
5945
5946       eol_type = CODING_ID_EOL_TYPE (coding->id);
5947
5948       if (coding->src_multibyte != coding->dst_multibyte)
5949         {
5950           if (coding->src_multibyte)
5951             {
5952               int multibytep = 1;
5953               int consumed_chars;
5954
5955               while (1)
5956                 {
5957                   const unsigned char *src_base = src;
5958                   int c;
5959
5960                   ONE_MORE_BYTE (c);
5961                   if (c == '\r')
5962                     {
5963                       if (EQ (eol_type, Qdos))
5964                         {
5965                           if (src == src_end)
5966                             {
5967                               record_conversion_result
5968                                 (coding, CODING_RESULT_INSUFFICIENT_SRC);
5969                               goto no_more_source;
5970                             }
5971                           if (*src == '\n')
5972                             c = *src++;
5973                         }
5974                       else if (EQ (eol_type, Qmac))
5975                         c = '\n';
5976                     }
5977                   if (dst == dst_end)
5978                     {
5979                       coding->consumed = src - coding->source;
5980
5981                     if (EQ (coding->src_object, coding->dst_object))
5982                       dst_end = (unsigned char *) src;
5983                     if (dst == dst_end)
5984                       {
5985                         dst = alloc_destination (coding, src_end - src + 1,
5986                                                  dst);
5987                         dst_end = coding->destination + coding->dst_bytes;
5988                         coding_set_source (coding);
5989                         src = coding->source + coding->consumed;
5990                         src_end = coding->source + coding->src_bytes;
5991                       }
5992                     }
5993                   *dst++ = c;
5994                   produced_chars++;
5995                 }
5996             no_more_source:
5997               ;
5998             }
5999           else
6000             while (src < src_end)
6001               {
6002                 int multibytep = 1;
6003                 int c = *src++;
6004
6005                 if (c == '\r')
6006                   {
6007                     if (EQ (eol_type, Qdos))
6008                       {
6009                         if (src < src_end
6010                             && *src == '\n')
6011                           c = *src++;
6012                       }
6013                     else if (EQ (eol_type, Qmac))
6014                       c = '\n';
6015                   }
6016                 if (dst >= dst_end - 1)
6017                   {
6018                     coding->consumed = src - coding->source;
6019
6020                     if (EQ (coding->src_object, coding->dst_object))
6021                       dst_end = (unsigned char *) src;
6022                     if (dst >= dst_end - 1)
6023                       {
6024                         dst = alloc_destination (coding, src_end - src + 2,
6025                                                  dst);
6026                         dst_end = coding->destination + coding->dst_bytes;
6027                         coding_set_source (coding);
6028                         src = coding->source + coding->consumed;
6029                         src_end = coding->source + coding->src_bytes;
6030                       }
6031                   }
6032                 EMIT_ONE_BYTE (c);
6033               }
6034         }
6035       else
6036         {
6037           if (!EQ (coding->src_object, coding->dst_object))
6038             {
6039               int require = coding->src_bytes - coding->dst_bytes;
6040
6041               if (require > 0)
6042                 {
6043                   EMACS_INT offset = src - coding->source;
6044
6045                   dst = alloc_destination (coding, require, dst);
6046                   coding_set_source (coding);
6047                   src = coding->source + offset;
6048                   src_end = coding->source + coding->src_bytes;
6049                 }
6050             }
6051           produced_chars = coding->src_chars;
6052           while (src < src_end)
6053             {
6054               int c = *src++;
6055
6056               if (c == '\r')
6057                 {
6058                   if (EQ (eol_type, Qdos))
6059                     {
6060                       if (src < src_end
6061                           && *src == '\n')
6062                         c = *src++;
6063                       produced_chars--;
6064                     }
6065                   else if (EQ (eol_type, Qmac))
6066                     c = '\n';
6067                 }
6068               *dst++ = c;
6069             }
6070         }
6071       coding->consumed = coding->src_bytes;
6072       coding->consumed_char = coding->src_chars;
6073     }
6074
6075   produced = dst - (coding->destination + coding->produced);
6076   if (BUFFERP (coding->dst_object))
6077     insert_from_gap (produced_chars, produced);
6078   coding->produced += produced;
6079   coding->produced_char += produced_chars;
6080   return carryover;
6081 }
6082
6083 /* Compose text in CODING->object according to the annotation data at
6084    CHARBUF.  CHARBUF is an array:
6085      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
6086  */
6087
6088 static INLINE void
6089 produce_composition (coding, charbuf, pos)
6090      struct coding_system *coding;
6091      int *charbuf;
6092      EMACS_INT pos;
6093 {
6094   int len;
6095   EMACS_INT to;
6096   enum composition_method method;
6097   Lisp_Object components;
6098
6099   len = -charbuf[0];
6100   to = pos + charbuf[2];
6101   if (to <= pos)
6102     return;
6103   method = (enum composition_method) (charbuf[3]);
6104
6105   if (method == COMPOSITION_RELATIVE)
6106     components = Qnil;
6107   else if (method >= COMPOSITION_WITH_RULE
6108            && method <= COMPOSITION_WITH_RULE_ALTCHARS)
6109     {
6110       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6111       int i;
6112
6113       len -= 4;
6114       charbuf += 4;
6115       for (i = 0; i < len; i++)
6116         {
6117           args[i] = make_number (charbuf[i]);
6118           if (charbuf[i] < 0)
6119             return;
6120         }
6121       components = (method == COMPOSITION_WITH_ALTCHARS
6122                     ? Fstring (len, args) : Fvector (len, args));
6123     }
6124   else
6125     return;
6126   compose_text (pos, to, components, Qnil, coding->dst_object);
6127 }
6128
6129
6130 /* Put `charset' property on text in CODING->object according to
6131    the annotation data at CHARBUF.  CHARBUF is an array:
6132      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6133  */
6134
6135 static INLINE void
6136 produce_charset (coding, charbuf, pos)
6137      struct coding_system *coding;
6138      int *charbuf;
6139      EMACS_INT pos;
6140 {
6141   EMACS_INT from = pos - charbuf[2];
6142   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6143
6144   Fput_text_property (make_number (from), make_number (pos),
6145                       Qcharset, CHARSET_NAME (charset),
6146                       coding->dst_object);
6147 }
6148
6149
6150 #define CHARBUF_SIZE 0x4000
6151
6152 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6153   do {                                                                  \
6154     int size = CHARBUF_SIZE;;                                           \
6155                                                                         \
6156     coding->charbuf = NULL;                                             \
6157     while (size > 1024)                                                 \
6158       {                                                                 \
6159         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6160         if (coding->charbuf)                                            \
6161           break;                                                        \
6162         size >>= 1;                                                     \
6163       }                                                                 \
6164     if (! coding->charbuf)                                              \
6165       {                                                                 \
6166         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6167         return coding->result;                                          \
6168       }                                                                 \
6169     coding->charbuf_size = size;                                        \
6170   } while (0)
6171
6172
6173 static void
6174 produce_annotation (coding, pos)
6175      struct coding_system *coding;
6176      EMACS_INT pos;
6177 {
6178   int *charbuf = coding->charbuf;
6179   int *charbuf_end = charbuf + coding->charbuf_used;
6180
6181   if (NILP (coding->dst_object))
6182     return;
6183
6184   while (charbuf < charbuf_end)
6185     {
6186       if (*charbuf >= 0)
6187         pos += *charbuf++;
6188       else
6189         {
6190           int len = -*charbuf;
6191           switch (charbuf[1])
6192             {
6193             case CODING_ANNOTATE_COMPOSITION_MASK:
6194               produce_composition (coding, charbuf, pos);
6195               break;
6196             case CODING_ANNOTATE_CHARSET_MASK:
6197               produce_charset (coding, charbuf, pos);
6198               break;
6199             default:
6200               abort ();
6201             }
6202           charbuf += len;
6203         }
6204     }
6205 }
6206
6207 /* Decode the data at CODING->src_object into CODING->dst_object.
6208    CODING->src_object is a buffer, a string, or nil.
6209    CODING->dst_object is a buffer.
6210
6211    If CODING->src_object is a buffer, it must be the current buffer.
6212    In this case, if CODING->src_pos is positive, it is a position of
6213    the source text in the buffer, otherwise, the source text is in the
6214    gap area of the buffer, and CODING->src_pos specifies the offset of
6215    the text from GPT (which must be the same as PT).  If this is the
6216    same buffer as CODING->dst_object, CODING->src_pos must be
6217    negative.
6218
6219    If CODING->src_object is a string, CODING->src_pos is an index to
6220    that string.
6221
6222    If CODING->src_object is nil, CODING->source must already point to
6223    the non-relocatable memory area.  In this case, CODING->src_pos is
6224    an offset from CODING->source.
6225
6226    The decoded data is inserted at the current point of the buffer
6227    CODING->dst_object.
6228 */
6229
6230 static int
6231 decode_coding (coding)
6232      struct coding_system *coding;
6233 {
6234   Lisp_Object attrs;
6235   Lisp_Object undo_list;
6236   Lisp_Object translation_table;
6237   int carryover;
6238   int i;
6239
6240   if (BUFFERP (coding->src_object)
6241       && coding->src_pos > 0
6242       && coding->src_pos < GPT
6243       && coding->src_pos + coding->src_chars > GPT)
6244     move_gap_both (coding->src_pos, coding->src_pos_byte);
6245
6246   undo_list = Qt;
6247   if (BUFFERP (coding->dst_object))
6248     {
6249       if (current_buffer != XBUFFER (coding->dst_object))
6250         set_buffer_internal (XBUFFER (coding->dst_object));
6251       if (GPT != PT)
6252         move_gap_both (PT, PT_BYTE);
6253       undo_list = current_buffer->undo_list;
6254       current_buffer->undo_list = Qt;
6255     }
6256
6257   coding->consumed = coding->consumed_char = 0;
6258   coding->produced = coding->produced_char = 0;
6259   coding->chars_at_source = 0;
6260   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6261   coding->errors = 0;
6262
6263   ALLOC_CONVERSION_WORK_AREA (coding);
6264
6265   attrs = CODING_ID_ATTRS (coding->id);
6266   translation_table = get_translation_table (attrs, 0, NULL);
6267
6268   carryover = 0;
6269   do
6270     {
6271       EMACS_INT pos = coding->dst_pos + coding->produced_char;
6272
6273       coding_set_source (coding);
6274       coding->annotated = 0;
6275       coding->charbuf_used = carryover;
6276       (*(coding->decoder)) (coding);
6277       coding_set_destination (coding);
6278       carryover = produce_chars (coding, translation_table, 0);
6279       if (coding->annotated)
6280         produce_annotation (coding, pos);
6281       for (i = 0; i < carryover; i++)
6282         coding->charbuf[i]
6283           = coding->charbuf[coding->charbuf_used - carryover + i];
6284     }
6285   while (coding->consumed < coding->src_bytes
6286          && (coding->result == CODING_RESULT_SUCCESS
6287              || coding->result == CODING_RESULT_INVALID_SRC));
6288
6289   if (carryover > 0)
6290     {
6291       coding_set_destination (coding);
6292       coding->charbuf_used = carryover;
6293       produce_chars (coding, translation_table, 1);
6294     }
6295
6296   coding->carryover_bytes = 0;
6297   if (coding->consumed < coding->src_bytes)
6298     {
6299       int nbytes = coding->src_bytes - coding->consumed;
6300       const unsigned char *src;
6301
6302       coding_set_source (coding);
6303       coding_set_destination (coding);
6304       src = coding->source + coding->consumed;
6305
6306       if (coding->mode & CODING_MODE_LAST_BLOCK)
6307         {
6308           /* Flush out unprocessed data as binary chars.  We are sure
6309              that the number of data is less than the size of
6310              coding->charbuf.  */
6311           coding->charbuf_used = 0;
6312           while (nbytes-- > 0)
6313             {
6314               int c = *src++;
6315
6316               if (c & 0x80)
6317                 c = BYTE8_TO_CHAR (c);
6318               coding->charbuf[coding->charbuf_used++] = c;
6319             }
6320           produce_chars (coding, Qnil, 1);
6321         }
6322       else
6323         {
6324           /* Record unprocessed bytes in coding->carryover.  We are
6325              sure that the number of data is less than the size of
6326              coding->carryover.  */
6327           unsigned char *p = coding->carryover;
6328
6329           coding->carryover_bytes = nbytes;
6330           while (nbytes-- > 0)
6331             *p++ = *src++;
6332         }
6333       coding->consumed = coding->src_bytes;
6334     }
6335
6336   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6337     decode_eol (coding);
6338   if (BUFFERP (coding->dst_object))
6339     {
6340       current_buffer->undo_list = undo_list;
6341       record_insert (coding->dst_pos, coding->produced_char);
6342     }
6343   return coding->result;
6344 }
6345
6346
6347 /* Extract an annotation datum from a composition starting at POS and
6348    ending before LIMIT of CODING->src_object (buffer or string), store
6349    the data in BUF, set *STOP to a starting position of the next
6350    composition (if any) or to LIMIT, and return the address of the
6351    next element of BUF.
6352
6353    If such an annotation is not found, set *STOP to a starting
6354    position of a composition after POS (if any) or to LIMIT, and
6355    return BUF.  */
6356
6357 static INLINE int *
6358 handle_composition_annotation (pos, limit, coding, buf, stop)
6359      EMACS_INT pos, limit;
6360      struct coding_system *coding;
6361      int *buf;
6362      EMACS_INT *stop;
6363 {
6364   EMACS_INT start, end;
6365   Lisp_Object prop;
6366
6367   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6368       || end > limit)
6369     *stop = limit;
6370   else if (start > pos)
6371     *stop = start;
6372   else
6373     {
6374       if (start == pos)
6375         {
6376           /* We found a composition.  Store the corresponding
6377              annotation data in BUF.  */
6378           int *head = buf;
6379           enum composition_method method = COMPOSITION_METHOD (prop);
6380           int nchars = COMPOSITION_LENGTH (prop);
6381
6382           ADD_COMPOSITION_DATA (buf, nchars, method);
6383           if (method != COMPOSITION_RELATIVE)
6384             {
6385               Lisp_Object components;
6386               int len, i, i_byte;
6387
6388               components = COMPOSITION_COMPONENTS (prop);
6389               if (VECTORP (components))
6390                 {
6391                   len = XVECTOR (components)->size;
6392                   for (i = 0; i < len; i++)
6393                     *buf++ = XINT (AREF (components, i));
6394                 }
6395               else if (STRINGP (components))
6396                 {
6397                   len = SCHARS (components);
6398                   i = i_byte = 0;
6399                   while (i < len)
6400                     {
6401                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6402                       buf++;
6403                     }
6404                 }
6405               else if (INTEGERP (components))
6406                 {
6407                   len = 1;
6408                   *buf++ = XINT (components);
6409                 }
6410               else if (CONSP (components))
6411                 {
6412                   for (len = 0; CONSP (components);
6413                        len++, components = XCDR (components))
6414                     *buf++ = XINT (XCAR (components));
6415                 }
6416               else
6417                 abort ();
6418               *head -= len;
6419             }
6420         }
6421
6422       if (find_composition (end, limit, &start, &end, &prop,
6423                             coding->src_object)
6424           && end <= limit)
6425         *stop = start;
6426       else
6427         *stop = limit;
6428     }
6429   return buf;
6430 }
6431
6432
6433 /* Extract an annotation datum from a text property `charset' at POS of
6434    CODING->src_object (buffer of string), store the data in BUF, set
6435    *STOP to the position where the value of `charset' property changes
6436    (limiting by LIMIT), and return the address of the next element of
6437    BUF.
6438
6439    If the property value is nil, set *STOP to the position where the
6440    property value is non-nil (limiting by LIMIT), and return BUF.  */
6441
6442 static INLINE int *
6443 handle_charset_annotation (pos, limit, coding, buf, stop)
6444      EMACS_INT pos, limit;
6445      struct coding_system *coding;
6446      int *buf;
6447      EMACS_INT *stop;
6448 {
6449   Lisp_Object val, next;
6450   int id;
6451
6452   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6453   if (! NILP (val) && CHARSETP (val))
6454     id = XINT (CHARSET_SYMBOL_ID (val));
6455   else
6456     id = -1;
6457   ADD_CHARSET_DATA (buf, 0, id);
6458   next = Fnext_single_property_change (make_number (pos), Qcharset,
6459                                        coding->src_object,
6460                                        make_number (limit));
6461   *stop = XINT (next);
6462   return buf;
6463 }
6464
6465
6466 static void
6467 consume_chars (coding, translation_table, max_lookup)
6468      struct coding_system *coding;
6469      Lisp_Object translation_table;
6470      int max_lookup;
6471 {
6472   int *buf = coding->charbuf;
6473   int *buf_end = coding->charbuf + coding->charbuf_size;
6474   const unsigned char *src = coding->source + coding->consumed;
6475   const unsigned char *src_end = coding->source + coding->src_bytes;
6476   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6477   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6478   int multibytep = coding->src_multibyte;
6479   Lisp_Object eol_type;
6480   int c;
6481   EMACS_INT stop, stop_composition, stop_charset;
6482   int *lookup_buf = NULL;
6483
6484   if (! NILP (translation_table))
6485     lookup_buf = alloca (sizeof (int) * max_lookup);
6486
6487   eol_type = CODING_ID_EOL_TYPE (coding->id);
6488   if (VECTORP (eol_type))
6489     eol_type = Qunix;
6490
6491   /* Note: composition handling is not yet implemented.  */
6492   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6493
6494   if (NILP (coding->src_object))
6495     stop = stop_composition = stop_charset = end_pos;
6496   else
6497     {
6498       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6499         stop = stop_composition = pos;
6500       else
6501         stop = stop_composition = end_pos;
6502       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6503         stop = stop_charset = pos;
6504       else
6505         stop_charset = end_pos;
6506     }
6507
6508   /* Compensate for CRLF and conversion.  */
6509   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6510   while (buf < buf_end)
6511     {
6512       Lisp_Object trans;
6513
6514       if (pos == stop)
6515         {
6516           if (pos == end_pos)
6517             break;
6518           if (pos == stop_composition)
6519             buf = handle_composition_annotation (pos, end_pos, coding,
6520                                                  buf, &stop_composition);
6521           if (pos == stop_charset)
6522             buf = handle_charset_annotation (pos, end_pos, coding,
6523                                              buf, &stop_charset);
6524           stop = (stop_composition < stop_charset
6525                   ? stop_composition : stop_charset);
6526         }
6527
6528       if (! multibytep)
6529         {
6530           EMACS_INT bytes;
6531
6532           if (coding->encoder == encode_coding_raw_text)
6533             c = *src++, pos++;
6534           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6535             c = STRING_CHAR_ADVANCE (src), pos += bytes;
6536           else
6537             c = BYTE8_TO_CHAR (*src), src++, pos++;
6538         }
6539       else
6540         c = STRING_CHAR_ADVANCE (src), pos++;
6541       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6542         c = '\n';
6543       if (! EQ (eol_type, Qunix))
6544         {
6545           if (c == '\n')
6546             {
6547               if (EQ (eol_type, Qdos))
6548                 *buf++ = '\r';
6549               else
6550                 c = '\r';
6551             }
6552         }
6553
6554       trans = Qnil;
6555       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6556       if (NILP (trans))
6557         *buf++ = c;
6558       else
6559         {
6560           int from_nchars = 1, to_nchars = 1;
6561           int *lookup_buf_end;
6562           const unsigned char *p = src;
6563           int i;
6564
6565           lookup_buf[0] = c;
6566           for (i = 1; i < max_lookup && p < src_end; i++)
6567             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6568           lookup_buf_end = lookup_buf + i;
6569           trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6570                                    &from_nchars, &to_nchars);
6571           if (EQ (trans, Qt)
6572               || buf + to_nchars > buf_end)
6573             break;
6574           *buf++ = *lookup_buf;
6575           for (i = 1; i < to_nchars; i++)
6576             *buf++ = XINT (AREF (trans, i));
6577           for (i = 1; i < from_nchars; i++, pos++)
6578             src += MULTIBYTE_LENGTH_NO_CHECK (src);
6579         }
6580     }
6581
6582   coding->consumed = src - coding->source;
6583   coding->consumed_char = pos - coding->src_pos;
6584   coding->charbuf_used = buf - coding->charbuf;
6585   coding->chars_at_source = 0;
6586 }
6587
6588
6589 /* Encode the text at CODING->src_object into CODING->dst_object.
6590    CODING->src_object is a buffer or a string.
6591    CODING->dst_object is a buffer or nil.
6592
6593    If CODING->src_object is a buffer, it must be the current buffer.
6594    In this case, if CODING->src_pos is positive, it is a position of
6595    the source text in the buffer, otherwise. the source text is in the
6596    gap area of the buffer, and coding->src_pos specifies the offset of
6597    the text from GPT (which must be the same as PT).  If this is the
6598    same buffer as CODING->dst_object, CODING->src_pos must be
6599    negative and CODING should not have `pre-write-conversion'.
6600
6601    If CODING->src_object is a string, CODING should not have
6602    `pre-write-conversion'.
6603
6604    If CODING->dst_object is a buffer, the encoded data is inserted at
6605    the current point of that buffer.
6606
6607    If CODING->dst_object is nil, the encoded data is placed at the
6608    memory area specified by CODING->destination.  */
6609
6610 static int
6611 encode_coding (coding)
6612      struct coding_system *coding;
6613 {
6614   Lisp_Object attrs;
6615   Lisp_Object translation_table;
6616   int max_lookup;
6617
6618   attrs = CODING_ID_ATTRS (coding->id);
6619   if (coding->encoder == encode_coding_raw_text)
6620     translation_table = Qnil, max_lookup = 0;
6621   else
6622     translation_table = get_translation_table (attrs, 1, &max_lookup);
6623
6624   if (BUFFERP (coding->dst_object))
6625     {
6626       set_buffer_internal (XBUFFER (coding->dst_object));
6627       coding->dst_multibyte
6628         = ! NILP (current_buffer->enable_multibyte_characters);
6629     }
6630
6631   coding->consumed = coding->consumed_char = 0;
6632   coding->produced = coding->produced_char = 0;
6633   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6634   coding->errors = 0;
6635
6636   ALLOC_CONVERSION_WORK_AREA (coding);
6637
6638   do {
6639     coding_set_source (coding);
6640     consume_chars (coding, translation_table, max_lookup);
6641     coding_set_destination (coding);
6642     (*(coding->encoder)) (coding);
6643   } while (coding->consumed_char < coding->src_chars);
6644
6645   if (BUFFERP (coding->dst_object))
6646     insert_from_gap (coding->produced_char, coding->produced);
6647
6648   return (coding->result);
6649 }
6650
6651
6652 /* Name (or base name) of work buffer for code conversion.  */
6653 static Lisp_Object Vcode_conversion_workbuf_name;
6654
6655 /* A working buffer used by the top level conversion.  Once it is
6656    created, it is never destroyed.  It has the name
6657    Vcode_conversion_workbuf_name.  The other working buffers are
6658    destroyed after the use is finished, and their names are modified
6659    versions of Vcode_conversion_workbuf_name.  */
6660 static Lisp_Object Vcode_conversion_reused_workbuf;
6661
6662 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
6663 static int reused_workbuf_in_use;
6664
6665
6666 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
6667    multibyteness of returning buffer.  */
6668
6669 static Lisp_Object
6670 make_conversion_work_buffer (multibyte)
6671      int multibyte;
6672 {
6673   Lisp_Object name, workbuf;
6674   struct buffer *current;
6675
6676   if (reused_workbuf_in_use++)
6677     {
6678       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6679       workbuf = Fget_buffer_create (name);
6680     }
6681   else
6682     {
6683       name = Vcode_conversion_workbuf_name;
6684       workbuf = Fget_buffer_create (name);
6685       if (NILP (Vcode_conversion_reused_workbuf))
6686         Vcode_conversion_reused_workbuf = workbuf;
6687     }
6688   current = current_buffer;
6689   set_buffer_internal (XBUFFER (workbuf));
6690   Ferase_buffer ();
6691   current_buffer->undo_list = Qt;
6692   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
6693   set_buffer_internal (current);
6694   return workbuf;
6695 }
6696
6697
6698 static Lisp_Object
6699 code_conversion_restore (arg)
6700      Lisp_Object arg;
6701 {
6702   Lisp_Object current, workbuf;
6703   struct gcpro gcpro1;
6704
6705   GCPRO1 (arg);
6706   current = XCAR (arg);
6707   workbuf = XCDR (arg);
6708   if (! NILP (workbuf))
6709     {
6710       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6711         reused_workbuf_in_use = 0;
6712       else if (! NILP (Fbuffer_live_p (workbuf)))
6713         Fkill_buffer (workbuf);
6714     }
6715   set_buffer_internal (XBUFFER (current));
6716   UNGCPRO;
6717   return Qnil;
6718 }
6719
6720 Lisp_Object
6721 code_conversion_save (with_work_buf, multibyte)
6722      int with_work_buf, multibyte;
6723 {
6724   Lisp_Object workbuf = Qnil;
6725
6726   if (with_work_buf)
6727     workbuf = make_conversion_work_buffer (multibyte);
6728   record_unwind_protect (code_conversion_restore,
6729                          Fcons (Fcurrent_buffer (), workbuf));
6730   return workbuf;
6731 }
6732
6733 int
6734 decode_coding_gap (coding, chars, bytes)
6735      struct coding_system *coding;
6736      EMACS_INT chars, bytes;
6737 {
6738   int count = specpdl_ptr - specpdl;
6739   Lisp_Object attrs;
6740
6741   code_conversion_save (0, 0);
6742
6743   coding->src_object = Fcurrent_buffer ();
6744   coding->src_chars = chars;
6745   coding->src_bytes = bytes;
6746   coding->src_pos = -chars;
6747   coding->src_pos_byte = -bytes;
6748   coding->src_multibyte = chars < bytes;
6749   coding->dst_object = coding->src_object;
6750   coding->dst_pos = PT;
6751   coding->dst_pos_byte = PT_BYTE;
6752   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6753
6754   if (CODING_REQUIRE_DETECTION (coding))
6755     detect_coding (coding);
6756
6757   coding->mode |= CODING_MODE_LAST_BLOCK;
6758   decode_coding (coding);
6759
6760   attrs = CODING_ID_ATTRS (coding->id);
6761   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6762     {
6763       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6764       Lisp_Object val;
6765
6766       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6767       val = call1 (CODING_ATTR_POST_READ (attrs),
6768                    make_number (coding->produced_char));
6769       CHECK_NATNUM (val);
6770       coding->produced_char += Z - prev_Z;
6771       coding->produced += Z_BYTE - prev_Z_BYTE;
6772     }
6773
6774   unbind_to (count, Qnil);
6775   return coding->result;
6776 }
6777
6778 int
6779 encode_coding_gap (coding, chars, bytes)
6780      struct coding_system *coding;
6781      EMACS_INT chars, bytes;
6782 {
6783   int count = specpdl_ptr - specpdl;
6784
6785   code_conversion_save (0, 0);
6786
6787   coding->src_object = Fcurrent_buffer ();
6788   coding->src_chars = chars;
6789   coding->src_bytes = bytes;
6790   coding->src_pos = -chars;
6791   coding->src_pos_byte = -bytes;
6792   coding->src_multibyte = chars < bytes;
6793   coding->dst_object = coding->src_object;
6794   coding->dst_pos = PT;
6795   coding->dst_pos_byte = PT_BYTE;
6796
6797   encode_coding (coding);
6798
6799   unbind_to (count, Qnil);
6800   return coding->result;
6801 }
6802
6803
6804 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6805    SRC_OBJECT into DST_OBJECT by coding context CODING.
6806
6807    SRC_OBJECT is a buffer, a string, or Qnil.
6808
6809    If it is a buffer, the text is at point of the buffer.  FROM and TO
6810    are positions in the buffer.
6811
6812    If it is a string, the text is at the beginning of the string.
6813    FROM and TO are indices to the string.
6814
6815    If it is nil, the text is at coding->source.  FROM and TO are
6816    indices to coding->source.
6817
6818    DST_OBJECT is a buffer, Qt, or Qnil.
6819
6820    If it is a buffer, the decoded text is inserted at point of the
6821    buffer.  If the buffer is the same as SRC_OBJECT, the source text
6822    is deleted.
6823
6824    If it is Qt, a string is made from the decoded text, and
6825    set in CODING->dst_object.
6826
6827    If it is Qnil, the decoded text is stored at CODING->destination.
6828    The caller must allocate CODING->dst_bytes bytes at
6829    CODING->destination by xmalloc.  If the decoded text is longer than
6830    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6831  */
6832
6833 void
6834 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6835                       dst_object)
6836      struct coding_system *coding;
6837      Lisp_Object src_object;
6838      EMACS_INT from, from_byte, to, to_byte;
6839      Lisp_Object dst_object;
6840 {
6841   int count = specpdl_ptr - specpdl;
6842   unsigned char *destination;
6843   EMACS_INT dst_bytes;
6844   EMACS_INT chars = to - from;
6845   EMACS_INT bytes = to_byte - from_byte;
6846   Lisp_Object attrs;
6847   Lisp_Object buffer;
6848   int saved_pt = -1, saved_pt_byte;
6849
6850   buffer = Fcurrent_buffer ();
6851
6852   if (NILP (dst_object))
6853     {
6854       destination = coding->destination;
6855       dst_bytes = coding->dst_bytes;
6856     }
6857
6858   coding->src_object = src_object;
6859   coding->src_chars = chars;
6860   coding->src_bytes = bytes;
6861   coding->src_multibyte = chars < bytes;
6862
6863   if (STRINGP (src_object))
6864     {
6865       coding->src_pos = from;
6866       coding->src_pos_byte = from_byte;
6867     }
6868   else if (BUFFERP (src_object))
6869     {
6870       set_buffer_internal (XBUFFER (src_object));
6871       if (from != GPT)
6872         move_gap_both (from, from_byte);
6873       if (EQ (src_object, dst_object))
6874         {
6875           saved_pt = PT, saved_pt_byte = PT_BYTE;
6876           TEMP_SET_PT_BOTH (from, from_byte);
6877           del_range_both (from, from_byte, to, to_byte, 1);
6878           coding->src_pos = -chars;
6879           coding->src_pos_byte = -bytes;
6880         }
6881       else
6882         {
6883           coding->src_pos = from;
6884           coding->src_pos_byte = from_byte;
6885         }
6886     }
6887
6888   if (CODING_REQUIRE_DETECTION (coding))
6889     detect_coding (coding);
6890   attrs = CODING_ID_ATTRS (coding->id);
6891
6892   if (EQ (dst_object, Qt)
6893       || (! NILP (CODING_ATTR_POST_READ (attrs))
6894           && NILP (dst_object)))
6895     {
6896       coding->dst_object = code_conversion_save (1, 1);
6897       coding->dst_pos = BEG;
6898       coding->dst_pos_byte = BEG_BYTE;
6899       coding->dst_multibyte = 1;
6900     }
6901   else if (BUFFERP (dst_object))
6902     {
6903       code_conversion_save (0, 0);
6904       coding->dst_object = dst_object;
6905       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6906       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6907       coding->dst_multibyte
6908         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6909     }
6910   else
6911     {
6912       code_conversion_save (0, 0);
6913       coding->dst_object = Qnil;
6914       coding->dst_multibyte = 1;
6915     }
6916
6917   decode_coding (coding);
6918
6919   if (BUFFERP (coding->dst_object))
6920     set_buffer_internal (XBUFFER (coding->dst_object));
6921
6922   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6923     {
6924       struct gcpro gcpro1, gcpro2;
6925       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6926       Lisp_Object val;
6927
6928       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6929       GCPRO2 (coding->src_object, coding->dst_object);
6930       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
6931                         make_number (coding->produced_char));
6932       UNGCPRO;
6933       CHECK_NATNUM (val);
6934       coding->produced_char += Z - prev_Z;
6935       coding->produced += Z_BYTE - prev_Z_BYTE;
6936     }
6937
6938   if (EQ (dst_object, Qt))
6939     {
6940       coding->dst_object = Fbuffer_string ();
6941     }
6942   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6943     {
6944       set_buffer_internal (XBUFFER (coding->dst_object));
6945       if (dst_bytes < coding->produced)
6946         {
6947           destination
6948             = (unsigned char *) xrealloc (destination, coding->produced);
6949           if (! destination)
6950             {
6951               record_conversion_result (coding,
6952                                         CODING_RESULT_INSUFFICIENT_DST);
6953               unbind_to (count, Qnil);
6954               return;
6955             }
6956           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6957             move_gap_both (BEGV, BEGV_BYTE);
6958           bcopy (BEGV_ADDR, destination, coding->produced);
6959           coding->destination = destination;
6960         }
6961     }
6962
6963   if (saved_pt >= 0)
6964     {
6965       /* This is the case of:
6966          (BUFFERP (src_object) && EQ (src_object, dst_object))
6967          As we have moved PT while replacing the original buffer
6968          contents, we must recover it now.  */
6969       set_buffer_internal (XBUFFER (src_object));
6970       if (saved_pt < from)
6971         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6972       else if (saved_pt < from + chars)
6973         TEMP_SET_PT_BOTH (from, from_byte);
6974       else if (! NILP (current_buffer->enable_multibyte_characters))
6975         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6976                           saved_pt_byte + (coding->produced - bytes));
6977       else
6978         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6979                           saved_pt_byte + (coding->produced - bytes));
6980     }
6981
6982   unbind_to (count, coding->dst_object);
6983 }
6984
6985
6986 void
6987 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6988                       dst_object)
6989      struct coding_system *coding;
6990      Lisp_Object src_object;
6991      EMACS_INT from, from_byte, to, to_byte;
6992      Lisp_Object dst_object;
6993 {
6994   int count = specpdl_ptr - specpdl;
6995   EMACS_INT chars = to - from;
6996   EMACS_INT bytes = to_byte - from_byte;
6997   Lisp_Object attrs;
6998   Lisp_Object buffer;
6999   int saved_pt = -1, saved_pt_byte;
7000   int kill_src_buffer = 0;
7001
7002   buffer = Fcurrent_buffer ();
7003
7004   coding->src_object = src_object;
7005   coding->src_chars = chars;
7006   coding->src_bytes = bytes;
7007   coding->src_multibyte = chars < bytes;
7008
7009   attrs = CODING_ID_ATTRS (coding->id);
7010
7011   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7012     {
7013       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7014       set_buffer_internal (XBUFFER (coding->src_object));
7015       if (STRINGP (src_object))
7016         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7017       else if (BUFFERP (src_object))
7018         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7019       else
7020         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7021
7022       if (EQ (src_object, dst_object))
7023         {
7024           set_buffer_internal (XBUFFER (src_object));
7025           saved_pt = PT, saved_pt_byte = PT_BYTE;
7026           del_range_both (from, from_byte, to, to_byte, 1);
7027           set_buffer_internal (XBUFFER (coding->src_object));
7028         }
7029
7030       {
7031         Lisp_Object args[3];
7032
7033         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7034         args[1] = make_number (BEG);
7035         args[2] = make_number (Z);
7036         safe_call (3, args);
7037       }
7038       if (XBUFFER (coding->src_object) != current_buffer)
7039         kill_src_buffer = 1;
7040       coding->src_object = Fcurrent_buffer ();
7041       if (BEG != GPT)
7042         move_gap_both (BEG, BEG_BYTE);
7043       coding->src_chars = Z - BEG;
7044       coding->src_bytes = Z_BYTE - BEG_BYTE;
7045       coding->src_pos = BEG;
7046       coding->src_pos_byte = BEG_BYTE;
7047       coding->src_multibyte = Z < Z_BYTE;
7048     }
7049   else if (STRINGP (src_object))
7050     {
7051       code_conversion_save (0, 0);
7052       coding->src_pos = from;
7053       coding->src_pos_byte = from_byte;
7054     }
7055   else if (BUFFERP (src_object))
7056     {
7057       code_conversion_save (0, 0);
7058       set_buffer_internal (XBUFFER (src_object));
7059       if (EQ (src_object, dst_object))
7060         {
7061           saved_pt = PT, saved_pt_byte = PT_BYTE;
7062           coding->src_object = del_range_1 (from, to, 1, 1);
7063           coding->src_pos = 0;
7064           coding->src_pos_byte = 0;
7065         }
7066       else
7067         {
7068           if (from < GPT && to >= GPT)
7069             move_gap_both (from, from_byte);
7070           coding->src_pos = from;
7071           coding->src_pos_byte = from_byte;
7072         }
7073     }
7074   else
7075     code_conversion_save (0, 0);
7076
7077   if (BUFFERP (dst_object))
7078     {
7079       coding->dst_object = dst_object;
7080       if (EQ (src_object, dst_object))
7081         {
7082           coding->dst_pos = from;
7083           coding->dst_pos_byte = from_byte;
7084         }
7085       else
7086         {
7087           coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7088           coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7089         }
7090       coding->dst_multibyte
7091         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7092     }
7093   else if (EQ (dst_object, Qt))
7094     {
7095       coding->dst_object = Qnil;
7096       coding->dst_bytes = coding->src_chars;
7097       if (coding->dst_bytes == 0)
7098         coding->dst_bytes = 1;
7099       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
7100       coding->dst_multibyte = 0;
7101     }
7102   else
7103     {
7104       coding->dst_object = Qnil;
7105       coding->dst_multibyte = 0;
7106     }
7107
7108   encode_coding (coding);
7109
7110   if (EQ (dst_object, Qt))
7111     {
7112       if (BUFFERP (coding->dst_object))
7113         coding->dst_object = Fbuffer_string ();
7114       else
7115         {
7116           coding->dst_object
7117             = make_unibyte_string ((char *) coding->destination,
7118                                    coding->produced);
7119           xfree (coding->destination);
7120         }
7121     }
7122
7123   if (saved_pt >= 0)
7124     {
7125       /* This is the case of:
7126          (BUFFERP (src_object) && EQ (src_object, dst_object))
7127          As we have moved PT while replacing the original buffer
7128          contents, we must recover it now.  */
7129       set_buffer_internal (XBUFFER (src_object));
7130       if (saved_pt < from)
7131         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7132       else if (saved_pt < from + chars)
7133         TEMP_SET_PT_BOTH (from, from_byte);
7134       else if (! NILP (current_buffer->enable_multibyte_characters))
7135         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7136                           saved_pt_byte + (coding->produced - bytes));
7137       else
7138         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7139                           saved_pt_byte + (coding->produced - bytes));
7140     }
7141
7142   if (kill_src_buffer)
7143     Fkill_buffer (coding->src_object);
7144   unbind_to (count, Qnil);
7145 }
7146
7147
7148 Lisp_Object
7149 preferred_coding_system ()
7150 {
7151   int id = coding_categories[coding_priorities[0]].id;
7152
7153   return CODING_ID_NAME (id);
7154 }
7155
7156 \f
7157 #ifdef emacs
7158 /*** 8. Emacs Lisp library functions ***/
7159
7160 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
7161        doc: /* Return t if OBJECT is nil or a coding-system.
7162 See the documentation of `define-coding-system' for information
7163 about coding-system objects.  */)
7164      (obj)
7165      Lisp_Object obj;
7166 {
7167   if (NILP (obj)
7168       || CODING_SYSTEM_ID (obj) >= 0)
7169     return Qt;
7170   if (! SYMBOLP (obj)
7171       || NILP (Fget (obj, Qcoding_system_define_form)))
7172     return Qnil;
7173   return Qt;
7174 }
7175
7176 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7177        Sread_non_nil_coding_system, 1, 1, 0,
7178        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
7179      (prompt)
7180      Lisp_Object prompt;
7181 {
7182   Lisp_Object val;
7183   do
7184     {
7185       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7186                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
7187     }
7188   while (SCHARS (val) == 0);
7189   return (Fintern (val, Qnil));
7190 }
7191
7192 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
7193        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
7194 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
7195      (prompt, default_coding_system)
7196      Lisp_Object prompt, default_coding_system;
7197 {
7198   Lisp_Object val;
7199   if (SYMBOLP (default_coding_system))
7200     XSETSTRING (default_coding_system, XPNTR (SYMBOL_NAME (default_coding_system)));
7201   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7202                           Qt, Qnil, Qcoding_system_history,
7203                           default_coding_system, Qnil);
7204   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
7205 }
7206
7207 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7208        1, 1, 0,
7209        doc: /* Check validity of CODING-SYSTEM.
7210 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7211 It is valid if it is nil or a symbol defined as a coding system by the
7212 function `define-coding-system'.  */)
7213   (coding_system)
7214      Lisp_Object coding_system;
7215 {
7216   Lisp_Object define_form;
7217
7218   define_form = Fget (coding_system, Qcoding_system_define_form);
7219   if (! NILP (define_form))
7220     {
7221       Fput (coding_system, Qcoding_system_define_form, Qnil);
7222       safe_eval (define_form);
7223     }
7224   if (!NILP (Fcoding_system_p (coding_system)))
7225     return coding_system;
7226   xsignal1 (Qcoding_system_error, coding_system);
7227 }
7228
7229 \f
7230 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
7231    HIGHEST is nonzero, return the coding system of the highest
7232    priority among the detected coding systems.  Otherwize return a
7233    list of detected coding systems sorted by their priorities.  If
7234    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7235    multibyte form but contains only ASCII and eight-bit chars.
7236    Otherwise, the bytes are raw bytes.
7237
7238    CODING-SYSTEM controls the detection as below:
7239
7240    If it is nil, detect both text-format and eol-format.  If the
7241    text-format part of CODING-SYSTEM is already specified
7242    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
7243    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7244    detect only text-format.  */
7245
7246 Lisp_Object
7247 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7248                       coding_system)
7249      const unsigned char *src;
7250      int src_chars, src_bytes, highest;
7251      int multibytep;
7252      Lisp_Object coding_system;
7253 {
7254   const unsigned char *src_end = src + src_bytes;
7255   Lisp_Object attrs, eol_type;
7256   Lisp_Object val;
7257   struct coding_system coding;
7258   int id;
7259   struct coding_detection_info detect_info;
7260   enum coding_category base_category;
7261
7262   if (NILP (coding_system))
7263     coding_system = Qundecided;
7264   setup_coding_system (coding_system, &coding);
7265   attrs = CODING_ID_ATTRS (coding.id);
7266   eol_type = CODING_ID_EOL_TYPE (coding.id);
7267   coding_system = CODING_ATTR_BASE_NAME (attrs);
7268
7269   coding.source = src;
7270   coding.src_chars = src_chars;
7271   coding.src_bytes = src_bytes;
7272   coding.src_multibyte = multibytep;
7273   coding.consumed = 0;
7274   coding.mode |= CODING_MODE_LAST_BLOCK;
7275
7276   detect_info.checked = detect_info.found = detect_info.rejected = 0;
7277
7278   /* At first, detect text-format if necessary.  */
7279   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7280   if (base_category == coding_category_undecided)
7281     {
7282       enum coding_category category;
7283       struct coding_system *this;
7284       int c, i;
7285
7286       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
7287       for (i = 0; src < src_end; i++, src++)
7288         {
7289           c = *src;
7290           if (c & 0x80)
7291             break;
7292           if (c < 0x20
7293               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
7294               && ! inhibit_iso_escape_detection)
7295             {
7296               coding.head_ascii = src - coding.source;
7297               if (detect_coding_iso_2022 (&coding, &detect_info))
7298                 {
7299                   /* We have scanned the whole data.  */
7300                   if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
7301                     /* We didn't find an 8-bit code.  */
7302                     src = src_end;
7303                   break;
7304                 }
7305             }
7306         }
7307       coding.head_ascii = src - coding.source;
7308
7309       if (src < src_end
7310           || detect_info.found)
7311         {
7312           if (src == src_end)
7313             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
7314             for (i = 0; i < coding_category_raw_text; i++)
7315               {
7316                 category = coding_priorities[i];
7317                 this = coding_categories + category;
7318                 if (detect_info.found & (1 << category))
7319                   break;
7320               }
7321           else
7322             for (i = 0; i < coding_category_raw_text; i++)
7323               {
7324                 category = coding_priorities[i];
7325                 this = coding_categories + category;
7326
7327                 if (this->id < 0)
7328                   {
7329                     /* No coding system of this category is defined.  */
7330                     detect_info.rejected |= (1 << category);
7331                   }
7332                 else if (category >= coding_category_raw_text)
7333                   continue;
7334                 else if (detect_info.checked & (1 << category))
7335                   {
7336                     if (highest
7337                         && (detect_info.found & (1 << category)))
7338                       break;
7339                   }
7340                 else
7341                   {
7342                     if ((*(this->detector)) (&coding, &detect_info)
7343                         && highest
7344                         && (detect_info.found & (1 << category)))
7345                       {
7346                         if (category == coding_category_utf_16_auto)
7347                           {
7348                             if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7349                               category = coding_category_utf_16_le;
7350                             else
7351                               category = coding_category_utf_16_be;
7352                           }
7353                         break;
7354                       }
7355                   }
7356               }
7357         }
7358
7359       if (detect_info.rejected == CATEGORY_MASK_ANY)
7360         {
7361           detect_info.found = CATEGORY_MASK_RAW_TEXT;
7362           id = coding_categories[coding_category_raw_text].id;
7363           val = Fcons (make_number (id), Qnil);
7364         }
7365       else if (! detect_info.rejected && ! detect_info.found)
7366         {
7367           detect_info.found = CATEGORY_MASK_ANY;
7368           id = coding_categories[coding_category_undecided].id;
7369           val = Fcons (make_number (id), Qnil);
7370         }
7371       else if (highest)
7372         {
7373           if (detect_info.found)
7374             {
7375               detect_info.found = 1 << category;
7376               val = Fcons (make_number (this->id), Qnil);
7377             }
7378           else
7379             for (i = 0; i < coding_category_raw_text; i++)
7380               if (! (detect_info.rejected & (1 << coding_priorities[i])))
7381                 {
7382                   detect_info.found = 1 << coding_priorities[i];
7383                   id = coding_categories[coding_priorities[i]].id;
7384                   val = Fcons (make_number (id), Qnil);
7385                   break;
7386                 }
7387         }
7388       else
7389         {
7390           int mask = detect_info.rejected | detect_info.found;
7391           int found = 0;
7392           val = Qnil;
7393
7394           for (i = coding_category_raw_text - 1; i >= 0; i--)
7395             {
7396               category = coding_priorities[i];
7397               if (! (mask & (1 << category)))
7398                 {
7399                   found |= 1 << category;
7400                   id = coding_categories[category].id;
7401                   if (id >= 0)
7402                     val = Fcons (make_number (id), val);
7403                 }
7404             }
7405           for (i = coding_category_raw_text - 1; i >= 0; i--)
7406             {
7407               category = coding_priorities[i];
7408               if (detect_info.found & (1 << category))
7409                 {
7410                   id = coding_categories[category].id;
7411                   val = Fcons (make_number (id), val);
7412                 }
7413             }
7414           detect_info.found |= found;
7415         }
7416     }
7417   else if (base_category == coding_category_utf_16_auto)
7418     {
7419       if (detect_coding_utf_16 (&coding, &detect_info))
7420         {
7421           struct coding_system *this;
7422
7423           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7424             this = coding_categories + coding_category_utf_16_le;
7425           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7426             this = coding_categories + coding_category_utf_16_be;
7427           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7428             this = coding_categories + coding_category_utf_16_be_nosig;
7429           else
7430             this = coding_categories + coding_category_utf_16_le_nosig;
7431           val = Fcons (make_number (this->id), Qnil);
7432         }
7433     }
7434   else
7435     {
7436       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
7437       val = Fcons (make_number (coding.id), Qnil);
7438     }
7439
7440   /* Then, detect eol-format if necessary.  */
7441   {
7442     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
7443     Lisp_Object tail;
7444
7445     if (VECTORP (eol_type))
7446       {
7447         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
7448           normal_eol = detect_eol (coding.source, src_bytes,
7449                                    coding_category_raw_text);
7450         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7451                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
7452           utf_16_be_eol = detect_eol (coding.source, src_bytes,
7453                                       coding_category_utf_16_be);
7454         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7455                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
7456           utf_16_le_eol = detect_eol (coding.source, src_bytes,
7457                                       coding_category_utf_16_le);
7458       }
7459     else
7460       {
7461         if (EQ (eol_type, Qunix))
7462           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7463         else if (EQ (eol_type, Qdos))
7464           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7465         else
7466           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7467       }
7468
7469     for (tail = val; CONSP (tail); tail = XCDR (tail))
7470       {
7471         enum coding_category category;
7472         int this_eol;
7473
7474         id = XINT (XCAR (tail));
7475         attrs = CODING_ID_ATTRS (id);
7476         category = XINT (CODING_ATTR_CATEGORY (attrs));
7477         eol_type = CODING_ID_EOL_TYPE (id);
7478         if (VECTORP (eol_type))
7479           {
7480             if (category == coding_category_utf_16_be
7481                 || category == coding_category_utf_16_be_nosig)
7482               this_eol = utf_16_be_eol;
7483             else if (category == coding_category_utf_16_le
7484                      || category == coding_category_utf_16_le_nosig)
7485               this_eol = utf_16_le_eol;
7486             else
7487               this_eol = normal_eol;
7488
7489             if (this_eol == EOL_SEEN_LF)
7490               XSETCAR (tail, AREF (eol_type, 0));
7491             else if (this_eol == EOL_SEEN_CRLF)
7492               XSETCAR (tail, AREF (eol_type, 1));
7493             else if (this_eol == EOL_SEEN_CR)
7494               XSETCAR (tail, AREF (eol_type, 2));
7495             else
7496               XSETCAR (tail, CODING_ID_NAME (id));
7497           }
7498         else
7499           XSETCAR (tail, CODING_ID_NAME (id));
7500       }
7501   }
7502
7503   return (highest ? XCAR (val) : val);
7504 }
7505
7506
7507 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7508        2, 3, 0,
7509        doc: /* Detect coding system of the text in the region between START and END.
7510 Return a list of possible coding systems ordered by priority.
7511
7512 If only ASCII characters are found (except for such ISO-2022 control
7513 characters ISO-2022 as ESC), it returns a list of single element
7514 `undecided' or its subsidiary coding system according to a detected
7515 end-of-line format.
7516
7517 If optional argument HIGHEST is non-nil, return the coding system of
7518 highest priority.  */)
7519      (start, end, highest)
7520      Lisp_Object start, end, highest;
7521 {
7522   int from, to;
7523   int from_byte, to_byte;
7524
7525   CHECK_NUMBER_COERCE_MARKER (start);
7526   CHECK_NUMBER_COERCE_MARKER (end);
7527
7528   validate_region (&start, &end);
7529   from = XINT (start), to = XINT (end);
7530   from_byte = CHAR_TO_BYTE (from);
7531   to_byte = CHAR_TO_BYTE (to);
7532
7533   if (from < GPT && to >= GPT)
7534     move_gap_both (to, to_byte);
7535
7536   return detect_coding_system (BYTE_POS_ADDR (from_byte),
7537                                to - from, to_byte - from_byte,
7538                                !NILP (highest),
7539                                !NILP (current_buffer
7540                                       ->enable_multibyte_characters),
7541                                Qnil);
7542 }
7543
7544 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7545        1, 2, 0,
7546        doc: /* Detect coding system of the text in STRING.
7547 Return a list of possible coding systems ordered by priority.
7548
7549 If only ASCII characters are found (except for such ISO-2022 control
7550 characters ISO-2022 as ESC), it returns a list of single element
7551 `undecided' or its subsidiary coding system according to a detected
7552 end-of-line format.
7553
7554 If optional argument HIGHEST is non-nil, return the coding system of
7555 highest priority.  */)
7556      (string, highest)
7557      Lisp_Object string, highest;
7558 {
7559   CHECK_STRING (string);
7560
7561   return detect_coding_system (SDATA (string),
7562                                SCHARS (string), SBYTES (string),
7563                                !NILP (highest), STRING_MULTIBYTE (string),
7564                                Qnil);
7565 }
7566
7567
7568 static INLINE int
7569 char_encodable_p (c, attrs)
7570      int c;
7571      Lisp_Object attrs;
7572 {
7573   Lisp_Object tail;
7574   struct charset *charset;
7575   Lisp_Object translation_table;
7576
7577   translation_table = CODING_ATTR_TRANS_TBL (attrs);
7578   if (! NILP (translation_table))
7579     c = translate_char (translation_table, c);
7580   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7581        CONSP (tail); tail = XCDR (tail))
7582     {
7583       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7584       if (CHAR_CHARSET_P (c, charset))
7585         break;
7586     }
7587   return (! NILP (tail));
7588 }
7589
7590
7591 /* Return a list of coding systems that safely encode the text between
7592    START and END.  If EXCLUDE is non-nil, it is a list of coding
7593    systems not to check.  The returned list doesn't contain any such
7594    coding systems.  In any case, if the text contains only ASCII or is
7595    unibyte, return t.  */
7596
7597 DEFUN ("find-coding-systems-region-internal",
7598        Ffind_coding_systems_region_internal,
7599        Sfind_coding_systems_region_internal, 2, 3, 0,
7600        doc: /* Internal use only.  */)
7601      (start, end, exclude)
7602      Lisp_Object start, end, exclude;
7603 {
7604   Lisp_Object coding_attrs_list, safe_codings;
7605   EMACS_INT start_byte, end_byte;
7606   const unsigned char *p, *pbeg, *pend;
7607   int c;
7608   Lisp_Object tail, elt;
7609
7610   if (STRINGP (start))
7611     {
7612       if (!STRING_MULTIBYTE (start)
7613           || SCHARS (start) == SBYTES (start))
7614         return Qt;
7615       start_byte = 0;
7616       end_byte = SBYTES (start);
7617     }
7618   else
7619     {
7620       CHECK_NUMBER_COERCE_MARKER (start);
7621       CHECK_NUMBER_COERCE_MARKER (end);
7622       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7623         args_out_of_range (start, end);
7624       if (NILP (current_buffer->enable_multibyte_characters))
7625         return Qt;
7626       start_byte = CHAR_TO_BYTE (XINT (start));
7627       end_byte = CHAR_TO_BYTE (XINT (end));
7628       if (XINT (end) - XINT (start) == end_byte - start_byte)
7629         return Qt;
7630
7631       if (XINT (start) < GPT && XINT (end) > GPT)
7632         {
7633           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7634             move_gap_both (XINT (start), start_byte);
7635           else
7636             move_gap_both (XINT (end), end_byte);
7637         }
7638     }
7639
7640   coding_attrs_list = Qnil;
7641   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7642     if (NILP (exclude)
7643         || NILP (Fmemq (XCAR (tail), exclude)))
7644       {
7645         Lisp_Object attrs;
7646
7647         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7648         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7649             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7650           {
7651             ASET (attrs, coding_attr_trans_tbl,
7652                   get_translation_table (attrs, 1, NULL));
7653             coding_attrs_list = Fcons (attrs, coding_attrs_list);
7654           }
7655       }
7656
7657   if (STRINGP (start))
7658     p = pbeg = SDATA (start);
7659   else
7660     p = pbeg = BYTE_POS_ADDR (start_byte);
7661   pend = p + (end_byte - start_byte);
7662
7663   while (p < pend && ASCII_BYTE_P (*p)) p++;
7664   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7665
7666   while (p < pend)
7667     {
7668       if (ASCII_BYTE_P (*p))
7669         p++;
7670       else
7671         {
7672           c = STRING_CHAR_ADVANCE (p);
7673
7674           charset_map_loaded = 0;
7675           for (tail = coding_attrs_list; CONSP (tail);)
7676             {
7677               elt = XCAR (tail);
7678               if (NILP (elt))
7679                 tail = XCDR (tail);
7680               else if (char_encodable_p (c, elt))
7681                 tail = XCDR (tail);
7682               else if (CONSP (XCDR (tail)))
7683                 {
7684                   XSETCAR (tail, XCAR (XCDR (tail)));
7685                   XSETCDR (tail, XCDR (XCDR (tail)));
7686                 }
7687               else
7688                 {
7689                   XSETCAR (tail, Qnil);
7690                   tail = XCDR (tail);
7691                 }
7692             }
7693           if (charset_map_loaded)
7694             {
7695               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7696
7697               if (STRINGP (start))
7698                 pbeg = SDATA (start);
7699               else
7700                 pbeg = BYTE_POS_ADDR (start_byte);
7701               p = pbeg + p_offset;
7702               pend = pbeg + pend_offset;
7703             }
7704         }
7705     }
7706
7707   safe_codings = list2 (Qraw_text, Qno_conversion);
7708   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7709     if (! NILP (XCAR (tail)))
7710       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
7711
7712   return safe_codings;
7713 }
7714
7715
7716 DEFUN ("unencodable-char-position", Funencodable_char_position,
7717        Sunencodable_char_position, 3, 5, 0,
7718        doc: /*
7719 Return position of first un-encodable character in a region.
7720 START and END specfiy the region and CODING-SYSTEM specifies the
7721 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
7722
7723 If optional 4th argument COUNT is non-nil, it specifies at most how
7724 many un-encodable characters to search.  In this case, the value is a
7725 list of positions.
7726
7727 If optional 5th argument STRING is non-nil, it is a string to search
7728 for un-encodable characters.  In that case, START and END are indexes
7729 to the string.  */)
7730      (start, end, coding_system, count, string)
7731      Lisp_Object start, end, coding_system, count, string;
7732 {
7733   int n;
7734   struct coding_system coding;
7735   Lisp_Object attrs, charset_list, translation_table;
7736   Lisp_Object positions;
7737   int from, to;
7738   const unsigned char *p, *stop, *pend;
7739   int ascii_compatible;
7740
7741   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7742   attrs = CODING_ID_ATTRS (coding.id);
7743   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7744     return Qnil;
7745   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7746   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7747   translation_table = get_translation_table (attrs, 1, NULL);
7748
7749   if (NILP (string))
7750     {
7751       validate_region (&start, &end);
7752       from = XINT (start);
7753       to = XINT (end);
7754       if (NILP (current_buffer->enable_multibyte_characters)
7755           || (ascii_compatible
7756               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7757         return Qnil;
7758       p = CHAR_POS_ADDR (from);
7759       pend = CHAR_POS_ADDR (to);
7760       if (from < GPT && to >= GPT)
7761         stop = GPT_ADDR;
7762       else
7763         stop = pend;
7764     }
7765   else
7766     {
7767       CHECK_STRING (string);
7768       CHECK_NATNUM (start);
7769       CHECK_NATNUM (end);
7770       from = XINT (start);
7771       to = XINT (end);
7772       if (from > to
7773           || to > SCHARS (string))
7774         args_out_of_range_3 (string, start, end);
7775       if (! STRING_MULTIBYTE (string))
7776         return Qnil;
7777       p = SDATA (string) + string_char_to_byte (string, from);
7778       stop = pend = SDATA (string) + string_char_to_byte (string, to);
7779       if (ascii_compatible && (to - from) == (pend - p))
7780         return Qnil;
7781     }
7782
7783   if (NILP (count))
7784     n = 1;
7785   else
7786     {
7787       CHECK_NATNUM (count);
7788       n = XINT (count);
7789     }
7790
7791   positions = Qnil;
7792   while (1)
7793     {
7794       int c;
7795
7796       if (ascii_compatible)
7797         while (p < stop && ASCII_BYTE_P (*p))
7798           p++, from++;
7799       if (p >= stop)
7800         {
7801           if (p >= pend)
7802             break;
7803           stop = pend;
7804           p = GAP_END_ADDR;
7805         }
7806
7807       c = STRING_CHAR_ADVANCE (p);
7808       if (! (ASCII_CHAR_P (c) && ascii_compatible)
7809           && ! char_charset (translate_char (translation_table, c),
7810                              charset_list, NULL))
7811         {
7812           positions = Fcons (make_number (from), positions);
7813           n--;
7814           if (n == 0)
7815             break;
7816         }
7817
7818       from++;
7819     }
7820
7821   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
7822 }
7823
7824
7825 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7826        Scheck_coding_systems_region, 3, 3, 0,
7827        doc: /* Check if the region is encodable by coding systems.
7828
7829 START and END are buffer positions specifying the region.
7830 CODING-SYSTEM-LIST is a list of coding systems to check.
7831
7832 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7833 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7834 whole region, POS0, POS1, ... are buffer positions where non-encodable
7835 characters are found.
7836
7837 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7838 value is nil.
7839
7840 START may be a string.  In that case, check if the string is
7841 encodable, and the value contains indices to the string instead of
7842 buffer positions.  END is ignored.  */)
7843      (start, end, coding_system_list)
7844      Lisp_Object start, end, coding_system_list;
7845 {
7846   Lisp_Object list;
7847   EMACS_INT start_byte, end_byte;
7848   int pos;
7849   const unsigned char *p, *pbeg, *pend;
7850   int c;
7851   Lisp_Object tail, elt, attrs;
7852
7853   if (STRINGP (start))
7854     {
7855       if (!STRING_MULTIBYTE (start)
7856           && SCHARS (start) != SBYTES (start))
7857         return Qnil;
7858       start_byte = 0;
7859       end_byte = SBYTES (start);
7860       pos = 0;
7861     }
7862   else
7863     {
7864       CHECK_NUMBER_COERCE_MARKER (start);
7865       CHECK_NUMBER_COERCE_MARKER (end);
7866       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7867         args_out_of_range (start, end);
7868       if (NILP (current_buffer->enable_multibyte_characters))
7869         return Qnil;
7870       start_byte = CHAR_TO_BYTE (XINT (start));
7871       end_byte = CHAR_TO_BYTE (XINT (end));
7872       if (XINT (end) - XINT (start) == end_byte - start_byte)
7873         return Qt;
7874
7875       if (XINT (start) < GPT && XINT (end) > GPT)
7876         {
7877           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7878             move_gap_both (XINT (start), start_byte);
7879           else
7880             move_gap_both (XINT (end), end_byte);
7881         }
7882       pos = XINT (start);
7883     }
7884
7885   list = Qnil;
7886   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7887     {
7888       elt = XCAR (tail);
7889       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
7890       ASET (attrs, coding_attr_trans_tbl,
7891             get_translation_table (attrs, 1, NULL));
7892       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
7893     }
7894
7895   if (STRINGP (start))
7896     p = pbeg = SDATA (start);
7897   else
7898     p = pbeg = BYTE_POS_ADDR (start_byte);
7899   pend = p + (end_byte - start_byte);
7900
7901   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7902   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7903
7904   while (p < pend)
7905     {
7906       if (ASCII_BYTE_P (*p))
7907         p++;
7908       else
7909         {
7910           c = STRING_CHAR_ADVANCE (p);
7911
7912           charset_map_loaded = 0;
7913           for (tail = list; CONSP (tail); tail = XCDR (tail))
7914             {
7915               elt = XCDR (XCAR (tail));
7916               if (! char_encodable_p (c, XCAR (elt)))
7917                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7918             }
7919           if (charset_map_loaded)
7920             {
7921               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7922
7923               if (STRINGP (start))
7924                 pbeg = SDATA (start);
7925               else
7926                 pbeg = BYTE_POS_ADDR (start_byte);
7927               p = pbeg + p_offset;
7928               pend = pbeg + pend_offset;
7929             }
7930         }
7931       pos++;
7932     }
7933
7934   tail = list;
7935   list = Qnil;
7936   for (; CONSP (tail); tail = XCDR (tail))
7937     {
7938       elt = XCAR (tail);
7939       if (CONSP (XCDR (XCDR (elt))))
7940         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7941                       list);
7942     }
7943
7944   return list;
7945 }
7946
7947
7948 Lisp_Object
7949 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7950      Lisp_Object start, end, coding_system, dst_object;
7951      int encodep, norecord;
7952 {
7953   struct coding_system coding;
7954   EMACS_INT from, from_byte, to, to_byte;
7955   Lisp_Object src_object;
7956
7957   CHECK_NUMBER_COERCE_MARKER (start);
7958   CHECK_NUMBER_COERCE_MARKER (end);
7959   if (NILP (coding_system))
7960     coding_system = Qno_conversion;
7961   else
7962     CHECK_CODING_SYSTEM (coding_system);
7963   src_object = Fcurrent_buffer ();
7964   if (NILP (dst_object))
7965     dst_object = src_object;
7966   else if (! EQ (dst_object, Qt))
7967     CHECK_BUFFER (dst_object);
7968
7969   validate_region (&start, &end);
7970   from = XFASTINT (start);
7971   from_byte = CHAR_TO_BYTE (from);
7972   to = XFASTINT (end);
7973   to_byte = CHAR_TO_BYTE (to);
7974
7975   setup_coding_system (coding_system, &coding);
7976   coding.mode |= CODING_MODE_LAST_BLOCK;
7977
7978   if (encodep)
7979     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7980                           dst_object);
7981   else
7982     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7983                           dst_object);
7984   if (! norecord)
7985     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7986
7987   return (BUFFERP (dst_object)
7988           ? make_number (coding.produced_char)
7989           : coding.dst_object);
7990 }
7991
7992
7993 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
7994        3, 4, "r\nzCoding system: ",
7995        doc: /* Decode the current region from the specified coding system.
7996 When called from a program, takes four arguments:
7997         START, END, CODING-SYSTEM, and DESTINATION.
7998 START and END are buffer positions.
7999
8000 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8001 If nil, the region between START and END is replace by the decoded text.
8002 If buffer, the decoded text is inserted in the buffer.
8003 If t, the decoded text is returned.
8004
8005 This function sets `last-coding-system-used' to the precise coding system
8006 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8007 not fully specified.)
8008 It returns the length of the decoded text.  */)
8009      (start, end, coding_system, destination)
8010      Lisp_Object start, end, coding_system, destination;
8011 {
8012   return code_convert_region (start, end, coding_system, destination, 0, 0);
8013 }
8014
8015 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
8016        3, 4, "r\nzCoding system: ",
8017        doc: /* Encode the current region by specified coding system.
8018 When called from a program, takes three arguments:
8019 START, END, and CODING-SYSTEM.  START and END are buffer positions.
8020
8021 Optional 4th arguments DESTINATION specifies where the encoded text goes.
8022 If nil, the region between START and END is replace by the encoded text.
8023 If buffer, the encoded text is inserted in the buffer.
8024 If t, the encoded text is returned.
8025
8026 This function sets `last-coding-system-used' to the precise coding system
8027 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8028 not fully specified.)
8029 It returns the length of the encoded text.  */)
8030   (start, end, coding_system, destination)
8031      Lisp_Object start, end, coding_system, destination;
8032 {
8033   return code_convert_region (start, end, coding_system, destination, 1, 0);
8034 }
8035
8036 Lisp_Object
8037 code_convert_string (string, coding_system, dst_object,
8038                      encodep, nocopy, norecord)
8039      Lisp_Object string, coding_system, dst_object;
8040      int encodep, nocopy, norecord;
8041 {
8042   struct coding_system coding;
8043   EMACS_INT chars, bytes;
8044
8045   CHECK_STRING (string);
8046   if (NILP (coding_system))
8047     {
8048       if (! norecord)
8049         Vlast_coding_system_used = Qno_conversion;
8050       if (NILP (dst_object))
8051         return (nocopy ? Fcopy_sequence (string) : string);
8052     }
8053
8054   if (NILP (coding_system))
8055     coding_system = Qno_conversion;
8056   else
8057     CHECK_CODING_SYSTEM (coding_system);
8058   if (NILP (dst_object))
8059     dst_object = Qt;
8060   else if (! EQ (dst_object, Qt))
8061     CHECK_BUFFER (dst_object);
8062
8063   setup_coding_system (coding_system, &coding);
8064   coding.mode |= CODING_MODE_LAST_BLOCK;
8065   chars = SCHARS (string);
8066   bytes = SBYTES (string);
8067   if (encodep)
8068     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8069   else
8070     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8071   if (! norecord)
8072     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8073
8074   return (BUFFERP (dst_object)
8075           ? make_number (coding.produced_char)
8076           : coding.dst_object);
8077 }
8078
8079
8080 /* Encode or decode STRING according to CODING_SYSTEM.
8081    Do not set Vlast_coding_system_used.
8082
8083    This function is called only from macros DECODE_FILE and
8084    ENCODE_FILE, thus we ignore character composition.  */
8085
8086 Lisp_Object
8087 code_convert_string_norecord (string, coding_system, encodep)
8088      Lisp_Object string, coding_system;
8089      int encodep;
8090 {
8091   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
8092 }
8093
8094
8095 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
8096        2, 4, 0,
8097        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
8098
8099 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
8100 if the decoding operation is trivial.
8101
8102 Optional fourth arg BUFFER non-nil meant that the decoded text is
8103 inserted in BUFFER instead of returned as a string.  In this case,
8104 the return value is BUFFER.
8105
8106 This function sets `last-coding-system-used' to the precise coding system
8107 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8108 not fully specified.  */)
8109   (string, coding_system, nocopy, buffer)
8110      Lisp_Object string, coding_system, nocopy, buffer;
8111 {
8112   return code_convert_string (string, coding_system, buffer,
8113                               0, ! NILP (nocopy), 0);
8114 }
8115
8116 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
8117        2, 4, 0,
8118        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
8119
8120 Optional third arg NOCOPY non-nil means it is OK to return STRING
8121 itself if the encoding operation is trivial.
8122
8123 Optional fourth arg BUFFER non-nil meant that the encoded text is
8124 inserted in BUFFER instead of returned as a string.  In this case,
8125 the return value is BUFFER.
8126
8127 This function sets `last-coding-system-used' to the precise coding system
8128 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8129 not fully specified.)  */)
8130      (string, coding_system, nocopy, buffer)
8131      Lisp_Object string, coding_system, nocopy, buffer;
8132 {
8133   return code_convert_string (string, coding_system, buffer,
8134                               1, ! NILP (nocopy), 1);
8135 }
8136
8137 \f
8138 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
8139        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
8140 Return the corresponding character.  */)
8141      (code)
8142      Lisp_Object code;
8143 {
8144   Lisp_Object spec, attrs, val;
8145   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
8146   int c;
8147
8148   CHECK_NATNUM (code);
8149   c = XFASTINT (code);
8150   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8151   attrs = AREF (spec, 0);
8152
8153   if (ASCII_BYTE_P (c)
8154       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8155     return code;
8156
8157   val = CODING_ATTR_CHARSET_LIST (attrs);
8158   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8159   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8160   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
8161
8162   if (c <= 0x7F)
8163     charset = charset_roman;
8164   else if (c >= 0xA0 && c < 0xDF)
8165     {
8166       charset = charset_kana;
8167       c -= 0x80;
8168     }
8169   else
8170     {
8171       int s1 = c >> 8, s2 = c & 0xFF;
8172
8173       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
8174           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
8175         error ("Invalid code: %d", code);
8176       SJIS_TO_JIS (c);
8177       charset = charset_kanji;
8178     }
8179   c = DECODE_CHAR (charset, c);
8180   if (c < 0)
8181     error ("Invalid code: %d", code);
8182   return make_number (c);
8183 }
8184
8185
8186 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8187        doc: /* Encode a Japanese character CH to shift_jis encoding.
8188 Return the corresponding code in SJIS.  */)
8189      (ch)
8190     Lisp_Object ch;
8191 {
8192   Lisp_Object spec, attrs, charset_list;
8193   int c;
8194   struct charset *charset;
8195   unsigned code;
8196
8197   CHECK_CHARACTER (ch);
8198   c = XFASTINT (ch);
8199   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8200   attrs = AREF (spec, 0);
8201
8202   if (ASCII_CHAR_P (c)
8203       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8204     return ch;
8205
8206   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8207   charset = char_charset (c, charset_list, &code);
8208   if (code == CHARSET_INVALID_CODE (charset))
8209     error ("Can't encode by shift_jis encoding: %d", c);
8210   JIS_TO_SJIS (code);
8211
8212   return make_number (code);
8213 }
8214
8215 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
8216        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8217 Return the corresponding character.  */)
8218      (code)
8219      Lisp_Object code;
8220 {
8221   Lisp_Object spec, attrs, val;
8222   struct charset *charset_roman, *charset_big5, *charset;
8223   int c;
8224
8225   CHECK_NATNUM (code);
8226   c = XFASTINT (code);
8227   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8228   attrs = AREF (spec, 0);
8229
8230   if (ASCII_BYTE_P (c)
8231       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8232     return code;
8233
8234   val = CODING_ATTR_CHARSET_LIST (attrs);
8235   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8236   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
8237
8238   if (c <= 0x7F)
8239     charset = charset_roman;
8240   else
8241     {
8242       int b1 = c >> 8, b2 = c & 0x7F;
8243       if (b1 < 0xA1 || b1 > 0xFE
8244           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8245         error ("Invalid code: %d", code);
8246       charset = charset_big5;
8247     }
8248   c = DECODE_CHAR (charset, (unsigned )c);
8249   if (c < 0)
8250     error ("Invalid code: %d", code);
8251   return make_number (c);
8252 }
8253
8254 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8255        doc: /* Encode the Big5 character CH to BIG5 coding system.
8256 Return the corresponding character code in Big5.  */)
8257      (ch)
8258      Lisp_Object ch;
8259 {
8260   Lisp_Object spec, attrs, charset_list;
8261   struct charset *charset;
8262   int c;
8263   unsigned code;
8264
8265   CHECK_CHARACTER (ch);
8266   c = XFASTINT (ch);
8267   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8268   attrs = AREF (spec, 0);
8269   if (ASCII_CHAR_P (c)
8270       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8271     return ch;
8272
8273   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8274   charset = char_charset (c, charset_list, &code);
8275   if (code == CHARSET_INVALID_CODE (charset))
8276     error ("Can't encode by Big5 encoding: %d", c);
8277
8278   return make_number (code);
8279 }
8280
8281 \f
8282 DEFUN ("set-terminal-coding-system-internal",
8283        Fset_terminal_coding_system_internal,
8284        Sset_terminal_coding_system_internal, 1, 1, 0,
8285        doc: /* Internal use only.  */)
8286      (coding_system)
8287      Lisp_Object coding_system;
8288 {
8289   CHECK_SYMBOL (coding_system);
8290   setup_coding_system (Fcheck_coding_system (coding_system),
8291                         &terminal_coding);
8292
8293   /* We had better not send unsafe characters to terminal.  */
8294   terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
8295   /* Characer composition should be disabled.  */
8296   terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8297   terminal_coding.src_multibyte = 1;
8298   terminal_coding.dst_multibyte = 0;
8299   return Qnil;
8300 }
8301
8302 DEFUN ("set-safe-terminal-coding-system-internal",
8303        Fset_safe_terminal_coding_system_internal,
8304        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
8305        doc: /* Internal use only.  */)
8306      (coding_system)
8307      Lisp_Object coding_system;
8308 {
8309   CHECK_SYMBOL (coding_system);
8310   setup_coding_system (Fcheck_coding_system (coding_system),
8311                        &safe_terminal_coding);
8312   /* Characer composition should be disabled.  */
8313   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8314   safe_terminal_coding.src_multibyte = 1;
8315   safe_terminal_coding.dst_multibyte = 0;
8316   return Qnil;
8317 }
8318
8319 DEFUN ("terminal-coding-system",
8320        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
8321        doc: /* Return coding system specified for terminal output.  */)
8322      ()
8323 {
8324   Lisp_Object coding_system;
8325
8326   coding_system = CODING_ID_NAME (terminal_coding.id);
8327   /* For backward compatibility, return nil if it is `undecided'. */
8328   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
8329 }
8330
8331 DEFUN ("set-keyboard-coding-system-internal",
8332        Fset_keyboard_coding_system_internal,
8333        Sset_keyboard_coding_system_internal, 1, 1, 0,
8334        doc: /* Internal use only.  */)
8335      (coding_system)
8336      Lisp_Object coding_system;
8337 {
8338   CHECK_SYMBOL (coding_system);
8339   setup_coding_system (Fcheck_coding_system (coding_system),
8340                        &keyboard_coding);
8341   /* Characer composition should be disabled.  */
8342   keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8343   return Qnil;
8344 }
8345
8346 DEFUN ("keyboard-coding-system",
8347        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
8348        doc: /* Return coding system specified for decoding keyboard input.  */)
8349      ()
8350 {
8351   return CODING_ID_NAME (keyboard_coding.id);
8352 }
8353
8354 \f
8355 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8356        Sfind_operation_coding_system,  1, MANY, 0,
8357        doc: /* Choose a coding system for an operation based on the target name.
8358 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8359 DECODING-SYSTEM is the coding system to use for decoding
8360 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8361 for encoding (in case OPERATION does encoding).
8362
8363 The first argument OPERATION specifies an I/O primitive:
8364   For file I/O, `insert-file-contents' or `write-region'.
8365   For process I/O, `call-process', `call-process-region', or `start-process'.
8366   For network I/O, `open-network-stream'.
8367
8368 The remaining arguments should be the same arguments that were passed
8369 to the primitive.  Depending on which primitive, one of those arguments
8370 is selected as the TARGET.  For example, if OPERATION does file I/O,
8371 whichever argument specifies the file name is TARGET.
8372
8373 TARGET has a meaning which depends on OPERATION:
8374   For file I/O, TARGET is a file name (except for the special case below).
8375   For process I/O, TARGET is a process name.
8376   For network I/O, TARGET is a service name or a port number
8377
8378 This function looks up what specified for TARGET in,
8379 `file-coding-system-alist', `process-coding-system-alist',
8380 or `network-coding-system-alist' depending on OPERATION.
8381 They may specify a coding system, a cons of coding systems,
8382 or a function symbol to call.
8383 In the last case, we call the function with one argument,
8384 which is a list of all the arguments given to this function.
8385
8386 If OPERATION is `insert-file-contents', the argument corresponding to
8387 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
8388 file name to look up, and BUFFER is a buffer that contains the file's
8389 contents (not yet decoded).  If `file-coding-system-alist' specifies a
8390 function to call for FILENAME, that function should examine the
8391 contents of BUFFER instead of reading the file.
8392
8393 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
8394      (nargs, args)
8395      int nargs;
8396      Lisp_Object *args;
8397 {
8398   Lisp_Object operation, target_idx, target, val;
8399   register Lisp_Object chain;
8400
8401   if (nargs < 2)
8402     error ("Too few arguments");
8403   operation = args[0];
8404   if (!SYMBOLP (operation)
8405       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
8406     error ("Invalid first arguement");
8407   if (nargs < 1 + XINT (target_idx))
8408     error ("Too few arguments for operation: %s",
8409            SDATA (SYMBOL_NAME (operation)));
8410   target = args[XINT (target_idx) + 1];
8411   if (!(STRINGP (target)
8412         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
8413             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
8414         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
8415     error ("Invalid %dth argument", XINT (target_idx) + 1);
8416   if (CONSP (target))
8417     target = XCAR (target);
8418
8419   chain = ((EQ (operation, Qinsert_file_contents)
8420             || EQ (operation, Qwrite_region))
8421            ? Vfile_coding_system_alist
8422            : (EQ (operation, Qopen_network_stream)
8423               ? Vnetwork_coding_system_alist
8424               : Vprocess_coding_system_alist));
8425   if (NILP (chain))
8426     return Qnil;
8427
8428   for (; CONSP (chain); chain = XCDR (chain))
8429     {
8430       Lisp_Object elt;
8431
8432       elt = XCAR (chain);
8433       if (CONSP (elt)
8434           && ((STRINGP (target)
8435                && STRINGP (XCAR (elt))
8436                && fast_string_match (XCAR (elt), target) >= 0)
8437               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
8438         {
8439           val = XCDR (elt);
8440           /* Here, if VAL is both a valid coding system and a valid
8441              function symbol, we return VAL as a coding system.  */
8442           if (CONSP (val))
8443             return val;
8444           if (! SYMBOLP (val))
8445             return Qnil;
8446           if (! NILP (Fcoding_system_p (val)))
8447             return Fcons (val, val);
8448           if (! NILP (Ffboundp (val)))
8449             {
8450               /* We use call1 rather than safe_call1
8451                  so as to get bug reports about functions called here
8452                  which don't handle the current interface.  */
8453               val = call1 (val, Flist (nargs, args));
8454               if (CONSP (val))
8455                 return val;
8456               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8457                 return Fcons (val, val);
8458             }
8459           return Qnil;
8460         }
8461     }
8462   return Qnil;
8463 }
8464
8465 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
8466        Sset_coding_system_priority, 0, MANY, 0,
8467        doc: /* Assign higher priority to the coding systems given as arguments.
8468 If multiple coding systems belongs to the same category,
8469 all but the first one are ignored.
8470
8471 usage: (set-coding-system-priority ...)  */)
8472      (nargs, args)
8473      int nargs;
8474      Lisp_Object *args;
8475 {
8476   int i, j;
8477   int changed[coding_category_max];
8478   enum coding_category priorities[coding_category_max];
8479
8480   bzero (changed, sizeof changed);
8481
8482   for (i = j = 0; i < nargs; i++)
8483     {
8484       enum coding_category category;
8485       Lisp_Object spec, attrs;
8486
8487       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8488       attrs = AREF (spec, 0);
8489       category = XINT (CODING_ATTR_CATEGORY (attrs));
8490       if (changed[category])
8491         /* Ignore this coding system because a coding system of the
8492            same category already had a higher priority.  */
8493         continue;
8494       changed[category] = 1;
8495       priorities[j++] = category;
8496       if (coding_categories[category].id >= 0
8497           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8498         setup_coding_system (args[i], &coding_categories[category]);
8499       Fset (AREF (Vcoding_category_table, category), args[i]);
8500     }
8501
8502   /* Now we have decided top J priorities.  Reflect the order of the
8503      original priorities to the remaining priorities.  */
8504
8505   for (i = j, j = 0; i < coding_category_max; i++, j++)
8506     {
8507       while (j < coding_category_max
8508              && changed[coding_priorities[j]])
8509         j++;
8510       if (j == coding_category_max)
8511         abort ();
8512       priorities[i] = coding_priorities[j];
8513     }
8514
8515   bcopy (priorities, coding_priorities, sizeof priorities);
8516
8517   /* Update `coding-category-list'.  */
8518   Vcoding_category_list = Qnil;
8519   for (i = coding_category_max - 1; i >= 0; i--)
8520     Vcoding_category_list
8521       = Fcons (AREF (Vcoding_category_table, priorities[i]),
8522                Vcoding_category_list);
8523
8524   return Qnil;
8525 }
8526
8527 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8528        Scoding_system_priority_list, 0, 1, 0,
8529        doc: /* Return a list of coding systems ordered by their priorities.
8530 HIGHESTP non-nil means just return the highest priority one.  */)
8531      (highestp)
8532      Lisp_Object highestp;
8533 {
8534   int i;
8535   Lisp_Object val;
8536
8537   for (i = 0, val = Qnil; i < coding_category_max; i++)
8538     {
8539       enum coding_category category = coding_priorities[i];
8540       int id = coding_categories[category].id;
8541       Lisp_Object attrs;
8542
8543       if (id < 0)
8544         continue;
8545       attrs = CODING_ID_ATTRS (id);
8546       if (! NILP (highestp))
8547         return CODING_ATTR_BASE_NAME (attrs);
8548       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8549     }
8550   return Fnreverse (val);
8551 }
8552
8553 static char *suffixes[] = { "-unix", "-dos", "-mac" };
8554
8555 static Lisp_Object
8556 make_subsidiaries (base)
8557      Lisp_Object base;
8558 {
8559   Lisp_Object subsidiaries;
8560   int base_name_len = SBYTES (SYMBOL_NAME (base));
8561   char *buf = (char *) alloca (base_name_len + 6);
8562   int i;
8563
8564   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
8565   subsidiaries = Fmake_vector (make_number (3), Qnil);
8566   for (i = 0; i < 3; i++)
8567     {
8568       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
8569       ASET (subsidiaries, i, intern (buf));
8570     }
8571   return subsidiaries;
8572 }
8573
8574
8575 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
8576        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
8577        doc: /* For internal use only.
8578 usage: (define-coding-system-internal ...)  */)
8579      (nargs, args)
8580      int nargs;
8581      Lisp_Object *args;
8582 {
8583   Lisp_Object name;
8584   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
8585   Lisp_Object attrs;            /* Vector of attributes.  */
8586   Lisp_Object eol_type;
8587   Lisp_Object aliases;
8588   Lisp_Object coding_type, charset_list, safe_charsets;
8589   enum coding_category category;
8590   Lisp_Object tail, val;
8591   int max_charset_id = 0;
8592   int i;
8593
8594   if (nargs < coding_arg_max)
8595     goto short_args;
8596
8597   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
8598
8599   name = args[coding_arg_name];
8600   CHECK_SYMBOL (name);
8601   CODING_ATTR_BASE_NAME (attrs) = name;
8602
8603   val = args[coding_arg_mnemonic];
8604   if (! STRINGP (val))
8605     CHECK_CHARACTER (val);
8606   CODING_ATTR_MNEMONIC (attrs) = val;
8607
8608   coding_type = args[coding_arg_coding_type];
8609   CHECK_SYMBOL (coding_type);
8610   CODING_ATTR_TYPE (attrs) = coding_type;
8611
8612   charset_list = args[coding_arg_charset_list];
8613   if (SYMBOLP (charset_list))
8614     {
8615       if (EQ (charset_list, Qiso_2022))
8616         {
8617           if (! EQ (coding_type, Qiso_2022))
8618             error ("Invalid charset-list");
8619           charset_list = Viso_2022_charset_list;
8620         }
8621       else if (EQ (charset_list, Qemacs_mule))
8622         {
8623           if (! EQ (coding_type, Qemacs_mule))
8624             error ("Invalid charset-list");
8625           charset_list = Vemacs_mule_charset_list;
8626         }
8627       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8628         if (max_charset_id < XFASTINT (XCAR (tail)))
8629           max_charset_id = XFASTINT (XCAR (tail));
8630     }
8631   else
8632     {
8633       charset_list = Fcopy_sequence (charset_list);
8634       for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
8635         {
8636           struct charset *charset;
8637
8638           val = Fcar (tail);
8639           CHECK_CHARSET_GET_CHARSET (val, charset);
8640           if (EQ (coding_type, Qiso_2022)
8641               ? CHARSET_ISO_FINAL (charset) < 0
8642               : EQ (coding_type, Qemacs_mule)
8643               ? CHARSET_EMACS_MULE_ID (charset) < 0
8644               : 0)
8645             error ("Can't handle charset `%s'",
8646                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8647
8648           XSETCAR (tail, make_number (charset->id));
8649           if (max_charset_id < charset->id)
8650             max_charset_id = charset->id;
8651         }
8652     }
8653   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
8654
8655   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8656                                 make_number (255));
8657   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8658     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
8659   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
8660
8661   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
8662
8663   val = args[coding_arg_decode_translation_table];
8664   if (! CHAR_TABLE_P (val) && ! CONSP (val))
8665     CHECK_SYMBOL (val);
8666   CODING_ATTR_DECODE_TBL (attrs) = val;
8667
8668   val = args[coding_arg_encode_translation_table];
8669   if (! CHAR_TABLE_P (val) && ! CONSP (val))
8670     CHECK_SYMBOL (val);
8671   CODING_ATTR_ENCODE_TBL (attrs) = val;
8672
8673   val = args[coding_arg_post_read_conversion];
8674   CHECK_SYMBOL (val);
8675   CODING_ATTR_POST_READ (attrs) = val;
8676
8677   val = args[coding_arg_pre_write_conversion];
8678   CHECK_SYMBOL (val);
8679   CODING_ATTR_PRE_WRITE (attrs) = val;
8680
8681   val = args[coding_arg_default_char];
8682   if (NILP (val))
8683     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8684   else
8685     {
8686       CHECK_CHARACTER (val);
8687       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8688     }
8689
8690   val = args[coding_arg_for_unibyte];
8691   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
8692
8693   val = args[coding_arg_plist];
8694   CHECK_LIST (val);
8695   CODING_ATTR_PLIST (attrs) = val;
8696
8697   if (EQ (coding_type, Qcharset))
8698     {
8699       /* Generate a lisp vector of 256 elements.  Each element is nil,
8700          integer, or a list of charset IDs.
8701
8702          If Nth element is nil, the byte code N is invalid in this
8703          coding system.
8704
8705          If Nth element is a number NUM, N is the first byte of a
8706          charset whose ID is NUM.
8707
8708          If Nth element is a list of charset IDs, N is the first byte
8709          of one of them.  The list is sorted by dimensions of the
8710          charsets.  A charset of smaller dimension comes firtst. */
8711       val = Fmake_vector (make_number (256), Qnil);
8712
8713       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8714         {
8715           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8716           int dim = CHARSET_DIMENSION (charset);
8717           int idx = (dim - 1) * 4;
8718
8719           if (CHARSET_ASCII_COMPATIBLE_P (charset))
8720             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8721
8722           for (i = charset->code_space[idx];
8723                i <= charset->code_space[idx + 1]; i++)
8724             {
8725               Lisp_Object tmp, tmp2;
8726               int dim2;
8727
8728               tmp = AREF (val, i);
8729               if (NILP (tmp))
8730                 tmp = XCAR (tail);
8731               else if (NUMBERP (tmp))
8732                 {
8733                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8734                   if (dim < dim2)
8735                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
8736                   else
8737                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
8738                 }
8739               else
8740                 {
8741                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8742                     {
8743                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8744                       if (dim < dim2)
8745                         break;
8746                     }
8747                   if (NILP (tmp2))
8748                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8749                   else
8750                     {
8751                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8752                       XSETCAR (tmp2, XCAR (tail));
8753                     }
8754                 }
8755               ASET (val, i, tmp);
8756             }
8757         }
8758       ASET (attrs, coding_attr_charset_valids, val);
8759       category = coding_category_charset;
8760     }
8761   else if (EQ (coding_type, Qccl))
8762     {
8763       Lisp_Object valids;
8764
8765       if (nargs < coding_arg_ccl_max)
8766         goto short_args;
8767
8768       val = args[coding_arg_ccl_decoder];
8769       CHECK_CCL_PROGRAM (val);
8770       if (VECTORP (val))
8771         val = Fcopy_sequence (val);
8772       ASET (attrs, coding_attr_ccl_decoder, val);
8773
8774       val = args[coding_arg_ccl_encoder];
8775       CHECK_CCL_PROGRAM (val);
8776       if (VECTORP (val))
8777         val = Fcopy_sequence (val);
8778       ASET (attrs, coding_attr_ccl_encoder, val);
8779
8780       val = args[coding_arg_ccl_valids];
8781       valids = Fmake_string (make_number (256), make_number (0));
8782       for (tail = val; !NILP (tail); tail = Fcdr (tail))
8783         {
8784           int from, to;
8785
8786           val = Fcar (tail);
8787           if (INTEGERP (val))
8788             {
8789               from = to = XINT (val);
8790               if (from < 0 || from > 255)
8791                 args_out_of_range_3 (val, make_number (0), make_number (255));
8792             }
8793           else
8794             {
8795               CHECK_CONS (val);
8796               CHECK_NATNUM_CAR (val);
8797               CHECK_NATNUM_CDR (val);
8798               from = XINT (XCAR (val));
8799               if (from > 255)
8800                 args_out_of_range_3 (XCAR (val),
8801                                      make_number (0), make_number (255));
8802               to = XINT (XCDR (val));
8803               if (to < from || to > 255)
8804                 args_out_of_range_3 (XCDR (val),
8805                                      XCAR (val), make_number (255));
8806             }
8807           for (i = from; i <= to; i++)
8808             SSET (valids, i, 1);
8809         }
8810       ASET (attrs, coding_attr_ccl_valids, valids);
8811
8812       category = coding_category_ccl;
8813     }
8814   else if (EQ (coding_type, Qutf_16))
8815     {
8816       Lisp_Object bom, endian;
8817
8818       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8819
8820       if (nargs < coding_arg_utf16_max)
8821         goto short_args;
8822
8823       bom = args[coding_arg_utf16_bom];
8824       if (! NILP (bom) && ! EQ (bom, Qt))
8825         {
8826           CHECK_CONS (bom);
8827           val = XCAR (bom);
8828           CHECK_CODING_SYSTEM (val);
8829           val = XCDR (bom);
8830           CHECK_CODING_SYSTEM (val);
8831         }
8832       ASET (attrs, coding_attr_utf_16_bom, bom);
8833
8834       endian = args[coding_arg_utf16_endian];
8835       CHECK_SYMBOL (endian);
8836       if (NILP (endian))
8837         endian = Qbig;
8838       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8839         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
8840       ASET (attrs, coding_attr_utf_16_endian, endian);
8841
8842       category = (CONSP (bom)
8843                   ? coding_category_utf_16_auto
8844                   : NILP (bom)
8845                   ? (EQ (endian, Qbig)
8846                      ? coding_category_utf_16_be_nosig
8847                      : coding_category_utf_16_le_nosig)
8848                   : (EQ (endian, Qbig)
8849                      ? coding_category_utf_16_be
8850                      : coding_category_utf_16_le));
8851     }
8852   else if (EQ (coding_type, Qiso_2022))
8853     {
8854       Lisp_Object initial, reg_usage, request, flags;
8855       int i;
8856
8857       if (nargs < coding_arg_iso2022_max)
8858         goto short_args;
8859
8860       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8861       CHECK_VECTOR (initial);
8862       for (i = 0; i < 4; i++)
8863         {
8864           val = Faref (initial, make_number (i));
8865           if (! NILP (val))
8866             {
8867               struct charset *charset;
8868
8869               CHECK_CHARSET_GET_CHARSET (val, charset);
8870               ASET (initial, i, make_number (CHARSET_ID (charset)));
8871               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8872                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8873             }
8874           else
8875             ASET (initial, i, make_number (-1));
8876         }
8877
8878       reg_usage = args[coding_arg_iso2022_reg_usage];
8879       CHECK_CONS (reg_usage);
8880       CHECK_NUMBER_CAR (reg_usage);
8881       CHECK_NUMBER_CDR (reg_usage);
8882
8883       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8884       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
8885         {
8886           int id;
8887           Lisp_Object tmp;
8888
8889           val = Fcar (tail);
8890           CHECK_CONS (val);
8891           tmp = XCAR (val);
8892           CHECK_CHARSET_GET_ID (tmp, id);
8893           CHECK_NATNUM_CDR (val);
8894           if (XINT (XCDR (val)) >= 4)
8895             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8896           XSETCAR (val, make_number (id));
8897         }
8898
8899       flags = args[coding_arg_iso2022_flags];
8900       CHECK_NATNUM (flags);
8901       i = XINT (flags);
8902       if (EQ (args[coding_arg_charset_list], Qiso_2022))
8903         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8904
8905       ASET (attrs, coding_attr_iso_initial, initial);
8906       ASET (attrs, coding_attr_iso_usage, reg_usage);
8907       ASET (attrs, coding_attr_iso_request, request);
8908       ASET (attrs, coding_attr_iso_flags, flags);
8909       setup_iso_safe_charsets (attrs);
8910
8911       if (i & CODING_ISO_FLAG_SEVEN_BITS)
8912         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8913                           | CODING_ISO_FLAG_SINGLE_SHIFT))
8914                     ? coding_category_iso_7_else
8915                     : EQ (args[coding_arg_charset_list], Qiso_2022)
8916                     ? coding_category_iso_7
8917                     : coding_category_iso_7_tight);
8918       else
8919         {
8920           int id = XINT (AREF (initial, 1));
8921
8922           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
8923                        || EQ (args[coding_arg_charset_list], Qiso_2022)
8924                        || id < 0)
8925                       ? coding_category_iso_8_else
8926                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8927                       ? coding_category_iso_8_1
8928                       : coding_category_iso_8_2);
8929         }
8930       if (category != coding_category_iso_8_1
8931           && category != coding_category_iso_8_2)
8932         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8933     }
8934   else if (EQ (coding_type, Qemacs_mule))
8935     {
8936       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8937         ASET (attrs, coding_attr_emacs_mule_full, Qt);
8938       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8939       category = coding_category_emacs_mule;
8940     }
8941   else if (EQ (coding_type, Qshift_jis))
8942     {
8943
8944       struct charset *charset;
8945
8946       if (XINT (Flength (charset_list)) != 3
8947           && XINT (Flength (charset_list)) != 4)
8948         error ("There should be three or four charsets");
8949
8950       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8951       if (CHARSET_DIMENSION (charset) != 1)
8952         error ("Dimension of charset %s is not one",
8953                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8954       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8955         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8956
8957       charset_list = XCDR (charset_list);
8958       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8959       if (CHARSET_DIMENSION (charset) != 1)
8960         error ("Dimension of charset %s is not one",
8961                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8962
8963       charset_list = XCDR (charset_list);
8964       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8965       if (CHARSET_DIMENSION (charset) != 2)
8966         error ("Dimension of charset %s is not two",
8967                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8968
8969       charset_list = XCDR (charset_list);
8970       if (! NILP (charset_list))
8971         {
8972           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8973           if (CHARSET_DIMENSION (charset) != 2)
8974             error ("Dimension of charset %s is not two",
8975                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8976         }
8977
8978       category = coding_category_sjis;
8979       Vsjis_coding_system = name;
8980     }
8981   else if (EQ (coding_type, Qbig5))
8982     {
8983       struct charset *charset;
8984
8985       if (XINT (Flength (charset_list)) != 2)
8986         error ("There should be just two charsets");
8987
8988       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8989       if (CHARSET_DIMENSION (charset) != 1)
8990         error ("Dimension of charset %s is not one",
8991                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8992       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8993         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8994
8995       charset_list = XCDR (charset_list);
8996       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8997       if (CHARSET_DIMENSION (charset) != 2)
8998         error ("Dimension of charset %s is not two",
8999                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9000
9001       category = coding_category_big5;
9002       Vbig5_coding_system = name;
9003     }
9004   else if (EQ (coding_type, Qraw_text))
9005     {
9006       category = coding_category_raw_text;
9007       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9008     }
9009   else if (EQ (coding_type, Qutf_8))
9010     {
9011       category = coding_category_utf_8;
9012       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9013     }
9014   else if (EQ (coding_type, Qundecided))
9015     category = coding_category_undecided;
9016   else
9017     error ("Invalid coding system type: %s",
9018            SDATA (SYMBOL_NAME (coding_type)));
9019
9020   CODING_ATTR_CATEGORY (attrs) = make_number (category);
9021   CODING_ATTR_PLIST (attrs)
9022     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
9023                                 CODING_ATTR_PLIST (attrs)));
9024   CODING_ATTR_PLIST (attrs)
9025     = Fcons (QCascii_compatible_p,
9026              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
9027                     CODING_ATTR_PLIST (attrs)));
9028
9029   eol_type = args[coding_arg_eol_type];
9030   if (! NILP (eol_type)
9031       && ! EQ (eol_type, Qunix)
9032       && ! EQ (eol_type, Qdos)
9033       && ! EQ (eol_type, Qmac))
9034     error ("Invalid eol-type");
9035
9036   aliases = Fcons (name, Qnil);
9037
9038   if (NILP (eol_type))
9039     {
9040       eol_type = make_subsidiaries (name);
9041       for (i = 0; i < 3; i++)
9042         {
9043           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
9044
9045           this_name = AREF (eol_type, i);
9046           this_aliases = Fcons (this_name, Qnil);
9047           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
9048           this_spec = Fmake_vector (make_number (3), attrs);
9049           ASET (this_spec, 1, this_aliases);
9050           ASET (this_spec, 2, this_eol_type);
9051           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
9052           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
9053           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
9054           if (NILP (val))
9055             Vcoding_system_alist
9056               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
9057                        Vcoding_system_alist);
9058         }
9059     }
9060
9061   spec_vec = Fmake_vector (make_number (3), attrs);
9062   ASET (spec_vec, 1, aliases);
9063   ASET (spec_vec, 2, eol_type);
9064
9065   Fputhash (name, spec_vec, Vcoding_system_hash_table);
9066   Vcoding_system_list = Fcons (name, Vcoding_system_list);
9067   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
9068   if (NILP (val))
9069     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
9070                                   Vcoding_system_alist);
9071
9072   {
9073     int id = coding_categories[category].id;
9074
9075     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
9076       setup_coding_system (name, &coding_categories[category]);
9077   }
9078
9079   return Qnil;
9080
9081  short_args:
9082   return Fsignal (Qwrong_number_of_arguments,
9083                   Fcons (intern ("define-coding-system-internal"),
9084                          make_number (nargs)));
9085 }
9086
9087
9088 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
9089        3, 3, 0,
9090        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
9091   (coding_system, prop, val)
9092      Lisp_Object coding_system, prop, val;
9093 {
9094   Lisp_Object spec, attrs;
9095
9096   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9097   attrs = AREF (spec, 0);
9098   if (EQ (prop, QCmnemonic))
9099     {
9100       if (! STRINGP (val))
9101         CHECK_CHARACTER (val);
9102       CODING_ATTR_MNEMONIC (attrs) = val;
9103     }
9104   else if (EQ (prop, QCdefalut_char))
9105     {
9106       if (NILP (val))
9107         val = make_number (' ');
9108       else
9109         CHECK_CHARACTER (val);
9110       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9111     }
9112   else if (EQ (prop, QCdecode_translation_table))
9113     {
9114       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9115         CHECK_SYMBOL (val);
9116       CODING_ATTR_DECODE_TBL (attrs) = val;
9117     }
9118   else if (EQ (prop, QCencode_translation_table))
9119     {
9120       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9121         CHECK_SYMBOL (val);
9122       CODING_ATTR_ENCODE_TBL (attrs) = val;
9123     }
9124   else if (EQ (prop, QCpost_read_conversion))
9125     {
9126       CHECK_SYMBOL (val);
9127       CODING_ATTR_POST_READ (attrs) = val;
9128     }
9129   else if (EQ (prop, QCpre_write_conversion))
9130     {
9131       CHECK_SYMBOL (val);
9132       CODING_ATTR_PRE_WRITE (attrs) = val;
9133     }
9134   else if (EQ (prop, QCascii_compatible_p))
9135     {
9136       CODING_ATTR_ASCII_COMPAT (attrs) = val;
9137     }
9138
9139   CODING_ATTR_PLIST (attrs)
9140     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
9141   return val;
9142 }
9143
9144
9145 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
9146        Sdefine_coding_system_alias, 2, 2, 0,
9147        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
9148      (alias, coding_system)
9149      Lisp_Object alias, coding_system;
9150 {
9151   Lisp_Object spec, aliases, eol_type, val;
9152
9153   CHECK_SYMBOL (alias);
9154   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9155   aliases = AREF (spec, 1);
9156   /* ALISES should be a list of length more than zero, and the first
9157      element is a base coding system.  Append ALIAS at the tail of the
9158      list.  */
9159   while (!NILP (XCDR (aliases)))
9160     aliases = XCDR (aliases);
9161   XSETCDR (aliases, Fcons (alias, Qnil));
9162
9163   eol_type = AREF (spec, 2);
9164   if (VECTORP (eol_type))
9165     {
9166       Lisp_Object subsidiaries;
9167       int i;
9168
9169       subsidiaries = make_subsidiaries (alias);
9170       for (i = 0; i < 3; i++)
9171         Fdefine_coding_system_alias (AREF (subsidiaries, i),
9172                                      AREF (eol_type, i));
9173     }
9174
9175   Fputhash (alias, spec, Vcoding_system_hash_table);
9176   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
9177   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
9178   if (NILP (val))
9179     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
9180                                   Vcoding_system_alist);
9181
9182   return Qnil;
9183 }
9184
9185 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
9186        1, 1, 0,
9187        doc: /* Return the base of CODING-SYSTEM.
9188 Any alias or subsidiary coding system is not a base coding system.  */)
9189   (coding_system)
9190      Lisp_Object coding_system;
9191 {
9192   Lisp_Object spec, attrs;
9193
9194   if (NILP (coding_system))
9195     return (Qno_conversion);
9196   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9197   attrs = AREF (spec, 0);
9198   return CODING_ATTR_BASE_NAME (attrs);
9199 }
9200
9201 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
9202        1, 1, 0,
9203        doc: "Return the property list of CODING-SYSTEM.")
9204      (coding_system)
9205      Lisp_Object coding_system;
9206 {
9207   Lisp_Object spec, attrs;
9208
9209   if (NILP (coding_system))
9210     coding_system = Qno_conversion;
9211   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9212   attrs = AREF (spec, 0);
9213   return CODING_ATTR_PLIST (attrs);
9214 }
9215
9216
9217 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
9218        1, 1, 0,
9219        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
9220      (coding_system)
9221      Lisp_Object coding_system;
9222 {
9223   Lisp_Object spec;
9224
9225   if (NILP (coding_system))
9226     coding_system = Qno_conversion;
9227   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9228   return AREF (spec, 1);
9229 }
9230
9231 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
9232        Scoding_system_eol_type, 1, 1, 0,
9233        doc: /* Return eol-type of CODING-SYSTEM.
9234 An eol-type is integer 0, 1, 2, or a vector of coding systems.
9235
9236 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
9237 and CR respectively.
9238
9239 A vector value indicates that a format of end-of-line should be
9240 detected automatically.  Nth element of the vector is the subsidiary
9241 coding system whose eol-type is N.  */)
9242      (coding_system)
9243      Lisp_Object coding_system;
9244 {
9245   Lisp_Object spec, eol_type;
9246   int n;
9247
9248   if (NILP (coding_system))
9249     coding_system = Qno_conversion;
9250   if (! CODING_SYSTEM_P (coding_system))
9251     return Qnil;
9252   spec = CODING_SYSTEM_SPEC (coding_system);
9253   eol_type = AREF (spec, 2);
9254   if (VECTORP (eol_type))
9255     return Fcopy_sequence (eol_type);
9256   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9257   return make_number (n);
9258 }
9259
9260 #endif /* emacs */
9261
9262 \f
9263 /*** 9. Post-amble ***/
9264
9265 void
9266 init_coding_once ()
9267 {
9268   int i;
9269
9270   for (i = 0; i < coding_category_max; i++)
9271     {
9272       coding_categories[i].id = -1;
9273       coding_priorities[i] = i;
9274     }
9275
9276   /* ISO2022 specific initialize routine.  */
9277   for (i = 0; i < 0x20; i++)
9278     iso_code_class[i] = ISO_control_0;
9279   for (i = 0x21; i < 0x7F; i++)
9280     iso_code_class[i] = ISO_graphic_plane_0;
9281   for (i = 0x80; i < 0xA0; i++)
9282     iso_code_class[i] = ISO_control_1;
9283   for (i = 0xA1; i < 0xFF; i++)
9284     iso_code_class[i] = ISO_graphic_plane_1;
9285   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9286   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
9287   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9288   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9289   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9290   iso_code_class[ISO_CODE_ESC] = ISO_escape;
9291   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9292   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9293   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9294
9295   for (i = 0; i < 256; i++)
9296     {
9297       emacs_mule_bytes[i] = 1;
9298     }
9299   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9300   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9301   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9302   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
9303 }
9304
9305 #ifdef emacs
9306
9307 void
9308 syms_of_coding ()
9309 {
9310   staticpro (&Vcoding_system_hash_table);
9311   {
9312     Lisp_Object args[2];
9313     args[0] = QCtest;
9314     args[1] = Qeq;
9315     Vcoding_system_hash_table = Fmake_hash_table (2, args);
9316   }
9317
9318   staticpro (&Vsjis_coding_system);
9319   Vsjis_coding_system = Qnil;
9320
9321   staticpro (&Vbig5_coding_system);
9322   Vbig5_coding_system = Qnil;
9323
9324   staticpro (&Vcode_conversion_reused_workbuf);
9325   Vcode_conversion_reused_workbuf = Qnil;
9326
9327   staticpro (&Vcode_conversion_workbuf_name);
9328   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
9329
9330   reused_workbuf_in_use = 0;
9331
9332   DEFSYM (Qcharset, "charset");
9333   DEFSYM (Qtarget_idx, "target-idx");
9334   DEFSYM (Qcoding_system_history, "coding-system-history");
9335   Fset (Qcoding_system_history, Qnil);
9336
9337   /* Target FILENAME is the first argument.  */
9338   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9339   /* Target FILENAME is the third argument.  */
9340   Fput (Qwrite_region, Qtarget_idx, make_number (2));
9341
9342   DEFSYM (Qcall_process, "call-process");
9343   /* Target PROGRAM is the first argument.  */
9344   Fput (Qcall_process, Qtarget_idx, make_number (0));
9345
9346   DEFSYM (Qcall_process_region, "call-process-region");
9347   /* Target PROGRAM is the third argument.  */
9348   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9349
9350   DEFSYM (Qstart_process, "start-process");
9351   /* Target PROGRAM is the third argument.  */
9352   Fput (Qstart_process, Qtarget_idx, make_number (2));
9353
9354   DEFSYM (Qopen_network_stream, "open-network-stream");
9355   /* Target SERVICE is the fourth argument.  */
9356   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9357
9358   DEFSYM (Qcoding_system, "coding-system");
9359   DEFSYM (Qcoding_aliases, "coding-aliases");
9360
9361   DEFSYM (Qeol_type, "eol-type");
9362   DEFSYM (Qunix, "unix");
9363   DEFSYM (Qdos, "dos");
9364
9365   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9366   DEFSYM (Qpost_read_conversion, "post-read-conversion");
9367   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9368   DEFSYM (Qdefault_char, "default-char");
9369   DEFSYM (Qundecided, "undecided");
9370   DEFSYM (Qno_conversion, "no-conversion");
9371   DEFSYM (Qraw_text, "raw-text");
9372
9373   DEFSYM (Qiso_2022, "iso-2022");
9374
9375   DEFSYM (Qutf_8, "utf-8");
9376   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
9377
9378   DEFSYM (Qutf_16, "utf-16");
9379   DEFSYM (Qbig, "big");
9380   DEFSYM (Qlittle, "little");
9381
9382   DEFSYM (Qshift_jis, "shift-jis");
9383   DEFSYM (Qbig5, "big5");
9384
9385   DEFSYM (Qcoding_system_p, "coding-system-p");
9386
9387   DEFSYM (Qcoding_system_error, "coding-system-error");
9388   Fput (Qcoding_system_error, Qerror_conditions,
9389         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9390   Fput (Qcoding_system_error, Qerror_message,
9391         build_string ("Invalid coding system"));
9392
9393   /* Intern this now in case it isn't already done.
9394      Setting this variable twice is harmless.
9395      But don't staticpro it here--that is done in alloc.c.  */
9396   Qchar_table_extra_slots = intern ("char-table-extra-slots");
9397
9398   DEFSYM (Qtranslation_table, "translation-table");
9399   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
9400   DEFSYM (Qtranslation_table_id, "translation-table-id");
9401   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9402   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
9403
9404   DEFSYM (Qvalid_codes, "valid-codes");
9405
9406   DEFSYM (Qemacs_mule, "emacs-mule");
9407
9408   DEFSYM (QCcategory, ":category");
9409   DEFSYM (QCmnemonic, ":mnemonic");
9410   DEFSYM (QCdefalut_char, ":default-char");
9411   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9412   DEFSYM (QCencode_translation_table, ":encode-translation-table");
9413   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9414   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
9415   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
9416
9417   Vcoding_category_table
9418     = Fmake_vector (make_number (coding_category_max), Qnil);
9419   staticpro (&Vcoding_category_table);
9420   /* Followings are target of code detection.  */
9421   ASET (Vcoding_category_table, coding_category_iso_7,
9422         intern ("coding-category-iso-7"));
9423   ASET (Vcoding_category_table, coding_category_iso_7_tight,
9424         intern ("coding-category-iso-7-tight"));
9425   ASET (Vcoding_category_table, coding_category_iso_8_1,
9426         intern ("coding-category-iso-8-1"));
9427   ASET (Vcoding_category_table, coding_category_iso_8_2,
9428         intern ("coding-category-iso-8-2"));
9429   ASET (Vcoding_category_table, coding_category_iso_7_else,
9430         intern ("coding-category-iso-7-else"));
9431   ASET (Vcoding_category_table, coding_category_iso_8_else,
9432         intern ("coding-category-iso-8-else"));
9433   ASET (Vcoding_category_table, coding_category_utf_8,
9434         intern ("coding-category-utf-8"));
9435   ASET (Vcoding_category_table, coding_category_utf_16_be,
9436         intern ("coding-category-utf-16-be"));
9437   ASET (Vcoding_category_table, coding_category_utf_16_auto,
9438         intern ("coding-category-utf-16-auto"));
9439   ASET (Vcoding_category_table, coding_category_utf_16_le,
9440         intern ("coding-category-utf-16-le"));
9441   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9442         intern ("coding-category-utf-16-be-nosig"));
9443   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9444         intern ("coding-category-utf-16-le-nosig"));
9445   ASET (Vcoding_category_table, coding_category_charset,
9446         intern ("coding-category-charset"));
9447   ASET (Vcoding_category_table, coding_category_sjis,
9448         intern ("coding-category-sjis"));
9449   ASET (Vcoding_category_table, coding_category_big5,
9450         intern ("coding-category-big5"));
9451   ASET (Vcoding_category_table, coding_category_ccl,
9452         intern ("coding-category-ccl"));
9453   ASET (Vcoding_category_table, coding_category_emacs_mule,
9454         intern ("coding-category-emacs-mule"));
9455   /* Followings are NOT target of code detection.  */
9456   ASET (Vcoding_category_table, coding_category_raw_text,
9457         intern ("coding-category-raw-text"));
9458   ASET (Vcoding_category_table, coding_category_undecided,
9459         intern ("coding-category-undecided"));
9460
9461   DEFSYM (Qinsufficient_source, "insufficient-source");
9462   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9463   DEFSYM (Qinvalid_source, "invalid-source");
9464   DEFSYM (Qinterrupted, "interrupted");
9465   DEFSYM (Qinsufficient_memory, "insufficient-memory");
9466   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
9467
9468   defsubr (&Scoding_system_p);
9469   defsubr (&Sread_coding_system);
9470   defsubr (&Sread_non_nil_coding_system);
9471   defsubr (&Scheck_coding_system);
9472   defsubr (&Sdetect_coding_region);
9473   defsubr (&Sdetect_coding_string);
9474   defsubr (&Sfind_coding_systems_region_internal);
9475   defsubr (&Sunencodable_char_position);
9476   defsubr (&Scheck_coding_systems_region);
9477   defsubr (&Sdecode_coding_region);
9478   defsubr (&Sencode_coding_region);
9479   defsubr (&Sdecode_coding_string);
9480   defsubr (&Sencode_coding_string);
9481   defsubr (&Sdecode_sjis_char);
9482   defsubr (&Sencode_sjis_char);
9483   defsubr (&Sdecode_big5_char);
9484   defsubr (&Sencode_big5_char);
9485   defsubr (&Sset_terminal_coding_system_internal);
9486   defsubr (&Sset_safe_terminal_coding_system_internal);
9487   defsubr (&Sterminal_coding_system);
9488   defsubr (&Sset_keyboard_coding_system_internal);
9489   defsubr (&Skeyboard_coding_system);
9490   defsubr (&Sfind_operation_coding_system);
9491   defsubr (&Sset_coding_system_priority);
9492   defsubr (&Sdefine_coding_system_internal);
9493   defsubr (&Sdefine_coding_system_alias);
9494   defsubr (&Scoding_system_put);
9495   defsubr (&Scoding_system_base);
9496   defsubr (&Scoding_system_plist);
9497   defsubr (&Scoding_system_aliases);
9498   defsubr (&Scoding_system_eol_type);
9499   defsubr (&Scoding_system_priority_list);
9500
9501   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
9502                doc: /* List of coding systems.
9503
9504 Do not alter the value of this variable manually.  This variable should be
9505 updated by the functions `define-coding-system' and
9506 `define-coding-system-alias'.  */);
9507   Vcoding_system_list = Qnil;
9508
9509   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
9510                doc: /* Alist of coding system names.
9511 Each element is one element list of coding system name.
9512 This variable is given to `completing-read' as TABLE argument.
9513
9514 Do not alter the value of this variable manually.  This variable should be
9515 updated by the functions `make-coding-system' and
9516 `define-coding-system-alias'.  */);
9517   Vcoding_system_alist = Qnil;
9518
9519   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
9520                doc: /* List of coding-categories (symbols) ordered by priority.
9521
9522 On detecting a coding system, Emacs tries code detection algorithms
9523 associated with each coding-category one by one in this order.  When
9524 one algorithm agrees with a byte sequence of source text, the coding
9525 system bound to the corresponding coding-category is selected.
9526
9527 Don't modify this variable directly, but use `set-coding-priority'.  */);
9528   {
9529     int i;
9530
9531     Vcoding_category_list = Qnil;
9532     for (i = coding_category_max - 1; i >= 0; i--)
9533       Vcoding_category_list
9534         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
9535                  Vcoding_category_list);
9536   }
9537
9538   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
9539                doc: /* Specify the coding system for read operations.
9540 It is useful to bind this variable with `let', but do not set it globally.
9541 If the value is a coding system, it is used for decoding on read operation.
9542 If not, an appropriate element is used from one of the coding system alists:
9543 There are three such tables, `file-coding-system-alist',
9544 `process-coding-system-alist', and `network-coding-system-alist'.  */);
9545   Vcoding_system_for_read = Qnil;
9546
9547   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
9548                doc: /* Specify the coding system for write operations.
9549 Programs bind this variable with `let', but you should not set it globally.
9550 If the value is a coding system, it is used for encoding of output,
9551 when writing it to a file and when sending it to a file or subprocess.
9552
9553 If this does not specify a coding system, an appropriate element
9554 is used from one of the coding system alists:
9555 There are three such tables, `file-coding-system-alist',
9556 `process-coding-system-alist', and `network-coding-system-alist'.
9557 For output to files, if the above procedure does not specify a coding system,
9558 the value of `buffer-file-coding-system' is used.  */);
9559   Vcoding_system_for_write = Qnil;
9560
9561   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
9562                doc: /*
9563 Coding system used in the latest file or process I/O.  */);
9564   Vlast_coding_system_used = Qnil;
9565
9566   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
9567                doc: /*
9568 Error status of the last code conversion.
9569
9570 When an error was detected in the last code conversion, this variable
9571 is set to one of the following symbols.
9572   `insufficient-source'
9573   `inconsistent-eol'
9574   `invalid-source'
9575   `interrupted'
9576   `insufficient-memory'
9577 When no error was detected, the value doesn't change.  So, to check
9578 the error status of a code conversion by this variable, you must
9579 explicitly set this variable to nil before performing code
9580 conversion.  */);
9581   Vlast_code_conversion_error = Qnil;
9582
9583   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
9584                doc: /*
9585 *Non-nil means always inhibit code conversion of end-of-line format.
9586 See info node `Coding Systems' and info node `Text and Binary' concerning
9587 such conversion.  */);
9588   inhibit_eol_conversion = 0;
9589
9590   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
9591                doc: /*
9592 Non-nil means process buffer inherits coding system of process output.
9593 Bind it to t if the process output is to be treated as if it were a file
9594 read from some filesystem.  */);
9595   inherit_process_coding_system = 0;
9596
9597   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
9598                doc: /*
9599 Alist to decide a coding system to use for a file I/O operation.
9600 The format is ((PATTERN . VAL) ...),
9601 where PATTERN is a regular expression matching a file name,
9602 VAL is a coding system, a cons of coding systems, or a function symbol.
9603 If VAL is a coding system, it is used for both decoding and encoding
9604 the file contents.
9605 If VAL is a cons of coding systems, the car part is used for decoding,
9606 and the cdr part is used for encoding.
9607 If VAL is a function symbol, the function must return a coding system
9608 or a cons of coding systems which are used as above.  The function gets
9609 the arguments with which `find-operation-coding-systems' was called.
9610
9611 See also the function `find-operation-coding-system'
9612 and the variable `auto-coding-alist'.  */);
9613   Vfile_coding_system_alist = Qnil;
9614
9615   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
9616                doc: /*
9617 Alist to decide a coding system to use for a process I/O operation.
9618 The format is ((PATTERN . VAL) ...),
9619 where PATTERN is a regular expression matching a program name,
9620 VAL is a coding system, a cons of coding systems, or a function symbol.
9621 If VAL is a coding system, it is used for both decoding what received
9622 from the program and encoding what sent to the program.
9623 If VAL is a cons of coding systems, the car part is used for decoding,
9624 and the cdr part is used for encoding.
9625 If VAL is a function symbol, the function must return a coding system
9626 or a cons of coding systems which are used as above.
9627
9628 See also the function `find-operation-coding-system'.  */);
9629   Vprocess_coding_system_alist = Qnil;
9630
9631   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
9632                doc: /*
9633 Alist to decide a coding system to use for a network I/O operation.
9634 The format is ((PATTERN . VAL) ...),
9635 where PATTERN is a regular expression matching a network service name
9636 or is a port number to connect to,
9637 VAL is a coding system, a cons of coding systems, or a function symbol.
9638 If VAL is a coding system, it is used for both decoding what received
9639 from the network stream and encoding what sent to the network stream.
9640 If VAL is a cons of coding systems, the car part is used for decoding,
9641 and the cdr part is used for encoding.
9642 If VAL is a function symbol, the function must return a coding system
9643 or a cons of coding systems which are used as above.
9644
9645 See also the function `find-operation-coding-system'.  */);
9646   Vnetwork_coding_system_alist = Qnil;
9647
9648   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
9649                doc: /* Coding system to use with system messages.
9650 Also used for decoding keyboard input on X Window system.  */);
9651   Vlocale_coding_system = Qnil;
9652
9653   /* The eol mnemonics are reset in startup.el system-dependently.  */
9654   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
9655                doc: /*
9656 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
9657   eol_mnemonic_unix = build_string (":");
9658
9659   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
9660                doc: /*
9661 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
9662   eol_mnemonic_dos = build_string ("\\");
9663
9664   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
9665                doc: /*
9666 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
9667   eol_mnemonic_mac = build_string ("/");
9668
9669   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
9670                doc: /*
9671 *String displayed in mode line when end-of-line format is not yet determined.  */);
9672   eol_mnemonic_undecided = build_string (":");
9673
9674   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
9675                doc: /*
9676 *Non-nil enables character translation while encoding and decoding.  */);
9677   Venable_character_translation = Qt;
9678
9679   DEFVAR_LISP ("standard-translation-table-for-decode",
9680                &Vstandard_translation_table_for_decode,
9681                doc: /* Table for translating characters while decoding.  */);
9682   Vstandard_translation_table_for_decode = Qnil;
9683
9684   DEFVAR_LISP ("standard-translation-table-for-encode",
9685                &Vstandard_translation_table_for_encode,
9686                doc: /* Table for translating characters while encoding.  */);
9687   Vstandard_translation_table_for_encode = Qnil;
9688
9689   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
9690                doc: /* Alist of charsets vs revision numbers.
9691 While encoding, if a charset (car part of an element) is found,
9692 designate it with the escape sequence identifying revision (cdr part
9693 of the element).  */);
9694   Vcharset_revision_table = Qnil;
9695
9696   DEFVAR_LISP ("default-process-coding-system",
9697                &Vdefault_process_coding_system,
9698                doc: /* Cons of coding systems used for process I/O by default.
9699 The car part is used for decoding a process output,
9700 the cdr part is used for encoding a text to be sent to a process.  */);
9701   Vdefault_process_coding_system = Qnil;
9702
9703   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
9704                doc: /*
9705 Table of extra Latin codes in the range 128..159 (inclusive).
9706 This is a vector of length 256.
9707 If Nth element is non-nil, the existence of code N in a file
9708 \(or output of subprocess) doesn't prevent it to be detected as
9709 a coding system of ISO 2022 variant which has a flag
9710 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9711 or reading output of a subprocess.
9712 Only 128th through 159th elements has a meaning.  */);
9713   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
9714
9715   DEFVAR_LISP ("select-safe-coding-system-function",
9716                &Vselect_safe_coding_system_function,
9717                doc: /*
9718 Function to call to select safe coding system for encoding a text.
9719
9720 If set, this function is called to force a user to select a proper
9721 coding system which can encode the text in the case that a default
9722 coding system used in each operation can't encode the text.
9723
9724 The default value is `select-safe-coding-system' (which see).  */);
9725   Vselect_safe_coding_system_function = Qnil;
9726
9727   DEFVAR_BOOL ("coding-system-require-warning",
9728                &coding_system_require_warning,
9729                doc: /* Internal use only.
9730 If non-nil, on writing a file, `select-safe-coding-system-function' is
9731 called even if `coding-system-for-write' is non-nil.  The command
9732 `universal-coding-system-argument' binds this variable to t temporarily.  */);
9733   coding_system_require_warning = 0;
9734
9735
9736   DEFVAR_BOOL ("inhibit-iso-escape-detection",
9737                &inhibit_iso_escape_detection,
9738                doc: /*
9739 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9740
9741 By default, on reading a file, Emacs tries to detect how the text is
9742 encoded.  This code detection is sensitive to escape sequences.  If
9743 the sequence is valid as ISO2022, the code is determined as one of
9744 the ISO2022 encodings, and the file is decoded by the corresponding
9745 coding system (e.g. `iso-2022-7bit').
9746
9747 However, there may be a case that you want to read escape sequences in
9748 a file as is.  In such a case, you can set this variable to non-nil.
9749 Then, as the code detection ignores any escape sequences, no file is
9750 detected as encoded in some ISO2022 encoding.  The result is that all
9751 escape sequences become visible in a buffer.
9752
9753 The default value is nil, and it is strongly recommended not to change
9754 it.  That is because many Emacs Lisp source files that contain
9755 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9756 in Emacs's distribution, and they won't be decoded correctly on
9757 reading if you suppress escape sequence detection.
9758
9759 The other way to read escape sequences in a file without decoding is
9760 to explicitly specify some coding system that doesn't use ISO2022's
9761 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
9762   inhibit_iso_escape_detection = 0;
9763
9764   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
9765                doc: /* Char table for translating self-inserting characters.
9766 This is applied to the result of input methods, not their input.  See also
9767 `keyboard-translate-table'.  */);
9768     Vtranslation_table_for_input = Qnil;
9769
9770   {
9771     Lisp_Object args[coding_arg_max];
9772     Lisp_Object plist[16];
9773     int i;
9774
9775     for (i = 0; i < coding_arg_max; i++)
9776       args[i] = Qnil;
9777
9778     plist[0] = intern (":name");
9779     plist[1] = args[coding_arg_name] = Qno_conversion;
9780     plist[2] = intern (":mnemonic");
9781     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
9782     plist[4] = intern (":coding-type");
9783     plist[5] = args[coding_arg_coding_type] = Qraw_text;
9784     plist[6] = intern (":ascii-compatible-p");
9785     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
9786     plist[8] = intern (":default-char");
9787     plist[9] = args[coding_arg_default_char] = make_number (0);
9788     plist[10] = intern (":for-unibyte");
9789     plist[11] = args[coding_arg_for_unibyte] = Qt;
9790     plist[12] = intern (":docstring");
9791     plist[13] = build_string ("Do no conversion.\n\
9792 \n\
9793 When you visit a file with this coding, the file is read into a\n\
9794 unibyte buffer as is, thus each byte of a file is treated as a\n\
9795 character.");
9796     plist[14] = intern (":eol-type");
9797     plist[15] = args[coding_arg_eol_type] = Qunix;
9798     args[coding_arg_plist] = Flist (16, plist);
9799     Fdefine_coding_system_internal (coding_arg_max, args);
9800
9801     plist[1] = args[coding_arg_name] = Qundecided;
9802     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
9803     plist[5] = args[coding_arg_coding_type] = Qundecided;
9804     /* This is already set.
9805        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
9806     plist[8] = intern (":charset-list");
9807     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
9808     plist[11] = args[coding_arg_for_unibyte] = Qnil;
9809     plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
9810     plist[15] = args[coding_arg_eol_type] = Qnil;
9811     args[coding_arg_plist] = Flist (16, plist);
9812     Fdefine_coding_system_internal (coding_arg_max, args);
9813   }
9814
9815   setup_coding_system (Qno_conversion, &keyboard_coding);
9816   setup_coding_system (Qundecided, &terminal_coding);
9817   setup_coding_system (Qno_conversion, &safe_terminal_coding);
9818
9819   {
9820     int i;
9821
9822     for (i = 0; i < coding_category_max; i++)
9823       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
9824   }
9825 #if defined (MSDOS) || defined (WINDOWSNT)
9826   system_eol_type = Qdos;
9827 #else
9828   system_eol_type = Qunix;
9829 #endif
9830   staticpro (&system_eol_type);
9831 }
9832
9833 char *
9834 emacs_strerror (error_number)
9835      int error_number;
9836 {
9837   char *str;
9838
9839   synchronize_system_messages_locale ();
9840   str = strerror (error_number);
9841
9842   if (! NILP (Vlocale_coding_system))
9843     {
9844       Lisp_Object dec = code_convert_string_norecord (build_string (str),
9845                                                       Vlocale_coding_system,
9846                                                       0);
9847       str = (char *) SDATA (dec);
9848     }
9849
9850   return str;
9851 }
9852
9853 #endif /* emacs */
9854
9855 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
9856    (do not change this comment) */