src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3      Licensed to the Free Software Foundation.
   4    Copyright (C) 2001, 2002 Free Software Foundation, Inc.
   5    Copyright (C) 2003
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H13PRO009
   8
   9 This file is part of GNU Emacs.
  10
  11 GNU Emacs is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU General Public License as published by
  13 the Free Software Foundation; either version 2, or (at your option)
  14 any later version.
  15
  16 GNU Emacs is distributed in the hope that it will be useful,
  17 but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 GNU General Public License for more details.
  20
  21 You should have received a copy of the GNU General Public License
  22 along with GNU Emacs; see the file COPYING.  If not, write to
  23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  24 Boston, MA 02111-1307, USA.  */
  25
  26 /*** TABLE OF CONTENTS ***
  27
  28   0. General comments
  29   1. Preamble
  30   2. Emacs' internal format (emacs-utf-8) handlers
  31   3. UTF-8 handlers
  32   4. UTF-16 handlers
  33   5. Charset-base coding systems handlers
  34   6. emacs-mule (old Emacs' internal format) handlers
  35   7. ISO2022 handlers
  36   8. Shift-JIS and BIG5 handlers
  37   9. CCL handlers
  38   10. C library functions
  39   11. Emacs Lisp library functions
  40   12. Postamble
  41
  42 */
  43
  44 /*** 0. General comments ***
  45
  46
  47 CODING SYSTEM
  48
  49   A coding system is an object for an encoding mechanism that contains
  50   information about how to convert byte sequences to character
  51   sequences and vice versa.  When we say "decode", it means converting
  52   a byte sequence of a specific coding system into a character
  53   sequence that is represented by Emacs' internal coding system
  54   `emacs-utf-8', and when we say "encode", it means converting a
  55   character sequence of emacs-utf-8 to a byte sequence of a specific
  56   coding system.
  57
  58   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  59   C level, a coding system is represented by a vector of attributes
  60   stored in the hash table Vcharset_hash_table.  The conversion from
  61   coding system symbol to attributes vector is done by looking up
  62   Vcharset_hash_table by the symbol.
  63
  64   Coding systems are classified into the following types depending on
  65   the encoding mechanism.  Here's a brief description of the types.
  66
  67   o UTF-8
  68
  69   o UTF-16
  70
  71   o Charset-base coding system
  72
  73   A coding system defined by one or more (coded) character sets.
  74   Decoding and encoding are done by a code converter defined for each
  75   character set.
  76
  77   o Old Emacs internal format (emacs-mule)
  78
  79   The coding system adopted by old versions of Emacs (20 and 21).
  80
  81   o ISO2022-base coding system
  82
  83   The most famous coding system for multiple character sets.  X's
  84   Compound Text, various EUCs (Extended Unix Code), and coding systems
  85   used in the Internet communication such as ISO-2022-JP are all
  86   variants of ISO2022.
  87
  88   o SJIS (or Shift-JIS or MS-Kanji-Code)
  89
  90   A coding system to encode character sets: ASCII, JISX0201, and
  91   JISX0208.  Widely used for PC's in Japan.  Details are described in
  92   section 8.
  93
  94   o BIG5
  95
  96   A coding system to encode character sets: ASCII and Big5.  Widely
  97   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  98   described in section 8.  In this file, when we write "big5" (all
  99   lowercase), we mean the coding system, and when we write "Big5"
 100   (capitalized), we mean the character set.
 101
 102   o CCL
 103
 104   If a user wants to decode/encode text encoded in a coding system
 105   not listed above, he can supply a decoder and an encoder for it in
 106   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 107   program while decoding/encoding.
 108
 109   o Raw-text
 110
 111   A coding system for text containing raw eight-bit data.  Emacs
 112   treats each byte of source text as a character (except for
 113   end-of-line conversion).
 114
 115   o No-conversion
 116
 117   Like raw text, but don't do end-of-line conversion.
 118
 119
 120 END-OF-LINE FORMAT
 121
 122   How text end-of-line is encoded depends on operating system.  For
 123   instance, Unix's format is just one byte of LF (line-feed) code,
 124   whereas DOS's format is two-byte sequence of `carriage-return' and
 125   `line-feed' codes.  MacOS's format is usually one byte of
 126   `carriage-return'.
 127
 128   Since text character encoding and end-of-line encoding are
 129   independent, any coding system described above can take any format
 130   of end-of-line (except for no-conversion).
 131
 132 STRUCT CODING_SYSTEM
 133
 134   Before using a coding system for code conversion (i.e. decoding and
 135   encoding), we setup a structure of type `struct coding_system'.
 136   This structure keeps various information about a specific code
 137   conversion (e.g. the location of source and destination data).
 138
 139 */
 140
 141 /* COMMON MACROS */
 142
 143
 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 145
 146   These functions check if a byte sequence specified as a source in
 147   CODING conforms to the format of XXX, and update the members of
 148   DETECT_INFO.
 149
 150   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 151
 152   Below is the template of these functions.  */
 153
 154 #if 0
 155 static int
 156 detect_coding_XXX (coding, detect_info)
 157      struct coding_system *coding;
 158      struct coding_detection_info *detect_info;
 159 {
 160   unsigned char *src = coding->source;
 161   unsigned char *src_end = coding->source + coding->src_bytes;
 162   int multibytep = coding->src_multibyte;
 163   int consumed_chars = 0;
 164   int found = 0;
 165   ...;
 166
 167   while (1)
 168     {
 169       /* Get one byte from the source.  If the souce is exausted, jump
 170          to no_more_source:.  */
 171       ONE_MORE_BYTE (c);
 172
 173       if (! __C_conforms_to_XXX___ (c))
 174         break;
 175       if (! __C_strongly_suggests_XXX__ (c))
 176         found = CATEGORY_MASK_XXX;
 177     }
 178   /* The byte sequence is invalid for XXX.  */
 179   detect_info->rejected |= CATEGORY_MASK_XXX;
 180   return 0;
 181
 182  no_more_source:
 183   /* The source exausted successfully.  */
 184   detect_info->found |= found;
 185   return 1;
 186 }
 187 #endif
 188
 189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 190
 191   These functions decode a byte sequence specified as a source by
 192   CODING.  The resulting multibyte text goes to a place pointed to by
 193   CODING->charbuf, the length of which should not exceed
 194   CODING->charbuf_size;
 195
 196   These functions set the information of original and decoded texts in
 197   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 198   They also set CODING->result to one of CODING_RESULT_XXX indicating
 199   how the decoding is finished.
 200
 201   Below is the template of these functions.  */
 202
 203 #if 0
 204 static void
 205 decode_coding_XXXX (coding)
 206      struct coding_system *coding;
 207 {
 208   unsigned char *src = coding->source + coding->consumed;
 209   unsigned char *src_end = coding->source + coding->src_bytes;
 210   /* SRC_BASE remembers the start position in source in each loop.
 211      The loop will be exited when there's not enough source code, or
 212      when there's no room in CHARBUF for a decoded character.  */
 213   unsigned char *src_base;
 214   /* A buffer to produce decoded characters.  */
 215   int *charbuf = coding->charbuf + coding->charbuf_used;
 216   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 217   int multibytep = coding->src_multibyte;
 218
 219   while (1)
 220     {
 221       src_base = src;
 222       if (charbuf < charbuf_end)
 223         /* No more room to produce a decoded character.  */
 224         break;
 225       ONE_MORE_BYTE (c);
 226       /* Decode it. */
 227     }
 228
 229  no_more_source:
 230   if (src_base < src_end
 231       && coding->mode & CODING_MODE_LAST_BLOCK)
 232     /* If the source ends by partial bytes to construct a character,
 233        treat them as eight-bit raw data.  */
 234     while (src_base < src_end && charbuf < charbuf_end)
 235       *charbuf++ = *src_base++;
 236   /* Remember how many bytes and characters we consumed.  If the
 237      source is multibyte, the bytes and chars are not identical.  */
 238   coding->consumed = coding->consumed_char = src_base - coding->source;
 239   /* Remember how many characters we produced.  */
 240   coding->charbuf_used = charbuf - coding->charbuf;
 241 }
 242 #endif
 243
 244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 245
 246   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 247   internal multibyte format by CODING.  The resulting byte sequence
 248   goes to a place pointed to by DESTINATION, the length of which
 249   should not exceed DST_BYTES.
 250
 251   These functions set the information of original and encoded texts in
 252   the members produced, produced_char, consumed, and consumed_char of
 253   the structure *CODING.  They also set the member result to one of
 254   CODING_RESULT_XXX indicating how the encoding finished.
 255
 256   DST_BYTES zero means that source area and destination area are
 257   overlapped, which means that we can produce a encoded text until it
 258   reaches at the head of not-yet-encoded source text.
 259
 260   Below is a template of these functions.  */
 261 #if 0
 262 static void
 263 encode_coding_XXX (coding)
 264      struct coding_system *coding;
 265 {
 266   int multibytep = coding->dst_multibyte;
 267   int *charbuf = coding->charbuf;
 268   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 269   unsigned char *dst = coding->destination + coding->produced;
 270   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 271   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 272   int produced_chars = 0;
 273
 274   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 275     {
 276       int c = *charbuf;
 277       /* Encode C into DST, and increment DST.  */
 278     }
 279  label_no_more_destination:
 280   /* How many chars and bytes we produced.  */
 281   coding->produced_char += produced_chars;
 282   coding->produced = dst - coding->destination;
 283 }
 284 #endif
 285
 286 \f
 287 /*** 1. Preamble ***/
 288
 289 #include <config.h>
 290 #include <stdio.h>
 291
 292 #include "lisp.h"
 293 #include "buffer.h"
 294 #include "character.h"
 295 #include "charset.h"
 296 #include "ccl.h"
 297 #include "composite.h"
 298 #include "coding.h"
 299 #include "window.h"
 300
 301 Lisp_Object Vcoding_system_hash_table;
 302
 303 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 304 Lisp_Object Qunix, Qdos;
 305 extern Lisp_Object Qmac;        /* frame.c */
 306 Lisp_Object Qbuffer_file_coding_system;
 307 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 308 Lisp_Object Qdefault_char;
 309 Lisp_Object Qno_conversion, Qundecided;
 310 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 311 Lisp_Object Qbig, Qlittle;
 312 Lisp_Object Qcoding_system_history;
 313 Lisp_Object Qvalid_codes;
 314 Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
 315 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 316 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 317
 318 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 319 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 320 Lisp_Object Qstart_process, Qopen_network_stream;
 321 Lisp_Object Qtarget_idx;
 322
 323 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 324 Lisp_Object Qinterrupted, Qinsufficient_memory;
 325
 326 int coding_system_require_warning;
 327
 328 Lisp_Object Vselect_safe_coding_system_function;
 329
 330 /* Mnemonic string for each format of end-of-line.  */
 331 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 332 /* Mnemonic string to indicate format of end-of-line is not yet
 333    decided.  */
 334 Lisp_Object eol_mnemonic_undecided;
 335
 336 #ifdef emacs
 337
 338 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 339
 340 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 341
 342 /* Coding system emacs-mule and raw-text are for converting only
 343    end-of-line format.  */
 344 Lisp_Object Qemacs_mule, Qraw_text;
 345 Lisp_Object Qutf_8_emacs;
 346
 347 /* Coding-systems are handed between Emacs Lisp programs and C internal
 348    routines by the following three variables.  */
 349 /* Coding-system for reading files and receiving data from process.  */
 350 Lisp_Object Vcoding_system_for_read;
 351 /* Coding-system for writing files and sending data to process.  */
 352 Lisp_Object Vcoding_system_for_write;
 353 /* Coding-system actually used in the latest I/O.  */
 354 Lisp_Object Vlast_coding_system_used;
 355 /* Set to non-nil when an error is detected while code conversion.  */
 356 Lisp_Object Vlast_code_conversion_error;
 357 /* A vector of length 256 which contains information about special
 358    Latin codes (especially for dealing with Microsoft codes).  */
 359 Lisp_Object Vlatin_extra_code_table;
 360
 361 /* Flag to inhibit code conversion of end-of-line format.  */
 362 int inhibit_eol_conversion;
 363
 364 /* Flag to inhibit ISO2022 escape sequence detection.  */
 365 int inhibit_iso_escape_detection;
 366
 367 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 368 int inherit_process_coding_system;
 369
 370 /* Coding system to be used to encode text for terminal display.  */
 371 struct coding_system terminal_coding;
 372
 373 /* Coding system to be used to encode text for terminal display when
 374    terminal coding system is nil.  */
 375 struct coding_system safe_terminal_coding;
 376
 377 /* Coding system of what is sent from terminal keyboard.  */
 378 struct coding_system keyboard_coding;
 379
 380 Lisp_Object Vfile_coding_system_alist;
 381 Lisp_Object Vprocess_coding_system_alist;
 382 Lisp_Object Vnetwork_coding_system_alist;
 383
 384 Lisp_Object Vlocale_coding_system;
 385
 386 #endif /* emacs */
 387
 388 /* Flag to tell if we look up translation table on character code
 389    conversion.  */
 390 Lisp_Object Venable_character_translation;
 391 /* Standard translation table to look up on decoding (reading).  */
 392 Lisp_Object Vstandard_translation_table_for_decode;
 393 /* Standard translation table to look up on encoding (writing).  */
 394 Lisp_Object Vstandard_translation_table_for_encode;
 395
 396 Lisp_Object Qtranslation_table;
 397 Lisp_Object Qtranslation_table_id;
 398 Lisp_Object Qtranslation_table_for_decode;
 399 Lisp_Object Qtranslation_table_for_encode;
 400
 401 /* Alist of charsets vs revision number.  */
 402 static Lisp_Object Vcharset_revision_table;
 403
 404 /* Default coding systems used for process I/O.  */
 405 Lisp_Object Vdefault_process_coding_system;
 406
 407 /* Char table for translating Quail and self-inserting input.  */
 408 Lisp_Object Vtranslation_table_for_input;
 409
 410 /* Two special coding systems.  */
 411 Lisp_Object Vsjis_coding_system;
 412 Lisp_Object Vbig5_coding_system;
 413
 414 /* ISO2022 section */
 415
 416 #define CODING_ISO_INITIAL(coding, reg)                 \
 417   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 418                      coding_attr_iso_initial),          \
 419                reg)))
 420
 421
 422 #define CODING_ISO_REQUEST(coding, charset_id)  \
 423   ((charset_id <= (coding)->max_charset_id      \
 424     ? (coding)->safe_charsets[charset_id]       \
 425     : -1))
 426
 427
 428 #define CODING_ISO_FLAGS(coding)        \
 429   ((coding)->spec.iso_2022.flags)
 430 #define CODING_ISO_DESIGNATION(coding, reg)     \
 431   ((coding)->spec.iso_2022.current_designation[reg])
 432 #define CODING_ISO_INVOCATION(coding, plane)    \
 433   ((coding)->spec.iso_2022.current_invocation[plane])
 434 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 435   ((coding)->spec.iso_2022.single_shifting)
 436 #define CODING_ISO_BOL(coding)  \
 437   ((coding)->spec.iso_2022.bol)
 438 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 439   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 440
 441 /* Control characters of ISO2022.  */
 442                         /* code */      /* function */
 443 #define ISO_CODE_LF     0x0A            /* line-feed */
 444 #define ISO_CODE_CR     0x0D            /* carriage-return */
 445 #define ISO_CODE_SO     0x0E            /* shift-out */
 446 #define ISO_CODE_SI     0x0F            /* shift-in */
 447 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 448 #define ISO_CODE_ESC    0x1B            /* escape */
 449 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 450 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 451 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 452
 453 /* All code (1-byte) of ISO2022 is classified into one of the
 454    followings.  */
 455 enum iso_code_class_type
 456   {
 457     ISO_control_0,              /* Control codes in the range
 458                                    0x00..0x1F and 0x7F, except for the
 459                                    following 5 codes.  */
 460     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 461     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 462     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 463     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 464     ISO_control_1,              /* Control codes in the range
 465                                    0x80..0x9F, except for the
 466                                    following 3 codes.  */
 467     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 468     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 469     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 470     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 471     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 472     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 473     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 474   };
 475
 476 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 477     `iso-flags' attribute of an iso2022 coding system.  */
 478
 479 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 480    instead of the correct short-form sequence (e.g. ESC $ A).  */
 481 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 482
 483 /* If set, reset graphic planes and registers at end-of-line to the
 484    initial state.  */
 485 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 486
 487 /* If set, reset graphic planes and registers before any control
 488    characters to the initial state.  */
 489 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 490
 491 /* If set, encode by 7-bit environment.  */
 492 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 493
 494 /* If set, use locking-shift function.  */
 495 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 496
 497 /* If set, use single-shift function.  Overwrite
 498    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 499 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 500
 501 /* If set, use designation escape sequence.  */
 502 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 503
 504 /* If set, produce revision number sequence.  */
 505 #define CODING_ISO_FLAG_REVISION        0x0080
 506
 507 /* If set, produce ISO6429's direction specifying sequence.  */
 508 #define CODING_ISO_FLAG_DIRECTION       0x0100
 509
 510 /* If set, assume designation states are reset at beginning of line on
 511    output.  */
 512 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 513
 514 /* If set, designation sequence should be placed at beginning of line
 515    on output.  */
 516 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 517
 518 /* If set, do not encode unsafe charactes on output.  */
 519 #define CODING_ISO_FLAG_SAFE            0x0800
 520
 521 /* If set, extra latin codes (128..159) are accepted as a valid code
 522    on input.  */
 523 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 524
 525 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 526
 527 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 528
 529 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 530
 531 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 532
 533 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 534
 535 /* A character to be produced on output if encoding of the original
 536    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 537 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 538
 539
 540 /* UTF-16 section */
 541 #define CODING_UTF_16_BOM(coding)       \
 542   ((coding)->spec.utf_16.bom)
 543
 544 #define CODING_UTF_16_ENDIAN(coding)    \
 545   ((coding)->spec.utf_16.endian)
 546
 547 #define CODING_UTF_16_SURROGATE(coding) \
 548   ((coding)->spec.utf_16.surrogate)
 549
 550
 551 /* CCL section */
 552 #define CODING_CCL_DECODER(coding)      \
 553   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 554 #define CODING_CCL_ENCODER(coding)      \
 555   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 556 #define CODING_CCL_VALIDS(coding)                                          \
 557   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 558
 559 /* Index for each coding category in `coding_categories' */
 560
 561 enum coding_category
 562   {
 563     coding_category_iso_7,
 564     coding_category_iso_7_tight,
 565     coding_category_iso_8_1,
 566     coding_category_iso_8_2,
 567     coding_category_iso_7_else,
 568     coding_category_iso_8_else,
 569     coding_category_utf_8,
 570     coding_category_utf_16_auto,
 571     coding_category_utf_16_be,
 572     coding_category_utf_16_le,
 573     coding_category_utf_16_be_nosig,
 574     coding_category_utf_16_le_nosig,
 575     coding_category_charset,
 576     coding_category_sjis,
 577     coding_category_big5,
 578     coding_category_ccl,
 579     coding_category_emacs_mule,
 580     /* All above are targets of code detection.  */
 581     coding_category_raw_text,
 582     coding_category_undecided,
 583     coding_category_max
 584   };
 585
 586 /* Definitions of flag bits used in detect_coding_XXXX.  */
 587 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 588 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 589 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 590 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 591 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 592 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 593 #define CATEGORY_MASK_UTF_8             (1 << coding_category_utf_8)
 594 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 595 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 596 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 597 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 598 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 599 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 600 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 601 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 602 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 603 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 604 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 605
 606 /* This value is returned if detect_coding_mask () find nothing other
 607    than ASCII characters.  */
 608 #define CATEGORY_MASK_ANY               \
 609   (CATEGORY_MASK_ISO_7                  \
 610    | CATEGORY_MASK_ISO_7_TIGHT          \
 611    | CATEGORY_MASK_ISO_8_1              \
 612    | CATEGORY_MASK_ISO_8_2              \
 613    | CATEGORY_MASK_ISO_7_ELSE           \
 614    | CATEGORY_MASK_ISO_8_ELSE           \
 615    | CATEGORY_MASK_UTF_8                \
 616    | CATEGORY_MASK_UTF_16_BE            \
 617    | CATEGORY_MASK_UTF_16_LE            \
 618    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 619    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 620    | CATEGORY_MASK_CHARSET              \
 621    | CATEGORY_MASK_SJIS                 \
 622    | CATEGORY_MASK_BIG5                 \
 623    | CATEGORY_MASK_CCL                  \
 624    | CATEGORY_MASK_EMACS_MULE)
 625
 626
 627 #define CATEGORY_MASK_ISO_7BIT \
 628   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 629
 630 #define CATEGORY_MASK_ISO_8BIT \
 631   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 632
 633 #define CATEGORY_MASK_ISO_ELSE \
 634   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 635
 636 #define CATEGORY_MASK_ISO_ESCAPE        \
 637   (CATEGORY_MASK_ISO_7                  \
 638    | CATEGORY_MASK_ISO_7_TIGHT          \
 639    | CATEGORY_MASK_ISO_7_ELSE           \
 640    | CATEGORY_MASK_ISO_8_ELSE)
 641
 642 #define CATEGORY_MASK_ISO       \
 643   (  CATEGORY_MASK_ISO_7BIT     \
 644      | CATEGORY_MASK_ISO_8BIT   \
 645      | CATEGORY_MASK_ISO_ELSE)
 646
 647 #define CATEGORY_MASK_UTF_16            \
 648   (CATEGORY_MASK_UTF_16_BE              \
 649    | CATEGORY_MASK_UTF_16_LE            \
 650    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 651    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 652
 653
 654 /* List of symbols `coding-category-xxx' ordered by priority.  This
 655    variable is exposed to Emacs Lisp.  */
 656 static Lisp_Object Vcoding_category_list;
 657
 658 /* Table of coding categories (Lisp symbols).  This variable is for
 659    internal use oly.  */
 660 static Lisp_Object Vcoding_category_table;
 661
 662 /* Table of coding-categories ordered by priority.  */
 663 static enum coding_category coding_priorities[coding_category_max];
 664
 665 /* Nth element is a coding context for the coding system bound to the
 666    Nth coding category.  */
 667 static struct coding_system coding_categories[coding_category_max];
 668
 669 /*** Commonly used macros and functions ***/
 670
 671 #ifndef min
 672 #define min(a, b) ((a) < (b) ? (a) : (b))
 673 #endif
 674 #ifndef max
 675 #define max(a, b) ((a) > (b) ? (a) : (b))
 676 #endif
 677
 678 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 679   do {                                                  \
 680     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 681     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 682   } while (0)
 683
 684
 685 /* Safely get one byte from the source text pointed by SRC which ends
 686    at SRC_END, and set C to that byte.  If there are not enough bytes
 687    in the source, it jumps to `no_more_source'.  If multibytep is
 688    nonzero, and a multibyte character is found at SRC, set C to the
 689    negative value of the character code.  The caller should declare
 690    and set these variables appropriately in advance:
 691         src, src_end, multibytep */
 692
 693 #define ONE_MORE_BYTE(c)                                \
 694   do {                                                  \
 695     if (src == src_end)                                 \
 696       {                                                 \
 697         if (src_base < src)                             \
 698           record_conversion_result                      \
 699             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 700         goto no_more_source;                            \
 701       }                                                 \
 702     c = *src++;                                         \
 703     if (multibytep && (c & 0x80))                       \
 704       {                                                 \
 705         if ((c & 0xFE) == 0xC0)                         \
 706           c = ((c & 1) << 6) | *src++;                  \
 707         else                                            \
 708           {                                             \
 709             c = - string_char (--src, &src, NULL);      \
 710             record_conversion_result                    \
 711               (coding, CODING_RESULT_INVALID_SRC);      \
 712           }                                             \
 713       }                                                 \
 714     consumed_chars++;                                   \
 715   } while (0)
 716
 717
 718 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 719   do {                                                  \
 720     c = *src++;                                         \
 721     if (multibytep && (c & 0x80))                       \
 722       {                                                 \
 723         if ((c & 0xFE) == 0xC0)                         \
 724           c = ((c & 1) << 6) | *src++;                  \
 725         else                                            \
 726           {                                             \
 727             c = - string_char (--src, &src, NULL);      \
 728             record_conversion_result                    \
 729               (coding, CODING_RESULT_INVALID_SRC);      \
 730           }                                             \
 731       }                                                 \
 732     consumed_chars++;                                   \
 733   } while (0)
 734
 735
 736 /* Store a byte C in the place pointed by DST and increment DST to the
 737    next free point, and increment PRODUCED_CHARS.  The caller should
 738    assure that C is 0..127, and declare and set the variable `dst'
 739    appropriately in advance.
 740 */
 741
 742
 743 #define EMIT_ONE_ASCII_BYTE(c)  \
 744   do {                          \
 745     produced_chars++;           \
 746     *dst++ = (c);               \
 747   } while (0)
 748
 749
 750 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 751
 752 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 753   do {                                  \
 754     produced_chars += 2;                \
 755     *dst++ = (c1), *dst++ = (c2);       \
 756   } while (0)
 757
 758
 759 /* Store a byte C in the place pointed by DST and increment DST to the
 760    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 761    nonzero, store in an appropriate multibyte from.  The caller should
 762    declare and set the variables `dst' and `multibytep' appropriately
 763    in advance.  */
 764
 765 #define EMIT_ONE_BYTE(c)                \
 766   do {                                  \
 767     produced_chars++;                   \
 768     if (multibytep)                     \
 769       {                                 \
 770         int ch = (c);                   \
 771         if (ch >= 0x80)                 \
 772           ch = BYTE8_TO_CHAR (ch);      \
 773         CHAR_STRING_ADVANCE (ch, dst);  \
 774       }                                 \
 775     else                                \
 776       *dst++ = (c);                     \
 777   } while (0)
 778
 779
 780 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 781
 782 #define EMIT_TWO_BYTES(c1, c2)          \
 783   do {                                  \
 784     produced_chars += 2;                \
 785     if (multibytep)                     \
 786       {                                 \
 787         int ch;                         \
 788                                         \
 789         ch = (c1);                      \
 790         if (ch >= 0x80)                 \
 791           ch = BYTE8_TO_CHAR (ch);      \
 792         CHAR_STRING_ADVANCE (ch, dst);  \
 793         ch = (c2);                      \
 794         if (ch >= 0x80)                 \
 795           ch = BYTE8_TO_CHAR (ch);      \
 796         CHAR_STRING_ADVANCE (ch, dst);  \
 797       }                                 \
 798     else                                \
 799       {                                 \
 800         *dst++ = (c1);                  \
 801         *dst++ = (c2);                  \
 802       }                                 \
 803   } while (0)
 804
 805
 806 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 807   do {                                  \
 808     EMIT_ONE_BYTE (c1);                 \
 809     EMIT_TWO_BYTES (c2, c3);            \
 810   } while (0)
 811
 812
 813 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 814   do {                                          \
 815     EMIT_TWO_BYTES (c1, c2);                    \
 816     EMIT_TWO_BYTES (c3, c4);                    \
 817   } while (0)
 818
 819
 820 /* Prototypes for static functions.  */
 821 static void record_conversion_result P_ ((struct coding_system *coding,
 822                                           enum coding_result_code result));
 823 static int detect_coding_utf_8 P_ ((struct coding_system *,
 824                                     struct coding_detection_info *info));
 825 static void decode_coding_utf_8 P_ ((struct coding_system *));
 826 static int encode_coding_utf_8 P_ ((struct coding_system *));
 827
 828 static int detect_coding_utf_16 P_ ((struct coding_system *,
 829                                      struct coding_detection_info *info));
 830 static void decode_coding_utf_16 P_ ((struct coding_system *));
 831 static int encode_coding_utf_16 P_ ((struct coding_system *));
 832
 833 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 834                                        struct coding_detection_info *info));
 835 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 836 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 837
 838 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 839                                          struct coding_detection_info *info));
 840 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 841 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 842
 843 static int detect_coding_sjis P_ ((struct coding_system *,
 844                                    struct coding_detection_info *info));
 845 static void decode_coding_sjis P_ ((struct coding_system *));
 846 static int encode_coding_sjis P_ ((struct coding_system *));
 847
 848 static int detect_coding_big5 P_ ((struct coding_system *,
 849                                    struct coding_detection_info *info));
 850 static void decode_coding_big5 P_ ((struct coding_system *));
 851 static int encode_coding_big5 P_ ((struct coding_system *));
 852
 853 static int detect_coding_ccl P_ ((struct coding_system *,
 854                                   struct coding_detection_info *info));
 855 static void decode_coding_ccl P_ ((struct coding_system *));
 856 static int encode_coding_ccl P_ ((struct coding_system *));
 857
 858 static void decode_coding_raw_text P_ ((struct coding_system *));
 859 static int encode_coding_raw_text P_ ((struct coding_system *));
 860
 861 static void coding_set_source P_ ((struct coding_system *));
 862 static void coding_set_destination P_ ((struct coding_system *));
 863 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 864 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 865                                             EMACS_INT));
 866 static unsigned char *alloc_destination P_ ((struct coding_system *,
 867                                              EMACS_INT, unsigned char *));
 868 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 869 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 870                                                      int *, int *,
 871                                                      unsigned char *));
 872 static int detect_eol P_ ((const unsigned char *,
 873                            EMACS_INT, enum coding_category));
 874 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 875 static void decode_eol P_ ((struct coding_system *));
 876 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 877 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
 878                                         int, int *, int *));
 879 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 880 static INLINE void produce_composition P_ ((struct coding_system *, int *,
 881                                             EMACS_INT));
 882 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 883                                         EMACS_INT));
 884 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 885 static int decode_coding P_ ((struct coding_system *));
 886 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 887                                                       struct coding_system *,
 888                                                       int *, EMACS_INT *));
 889 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 890                                                   struct coding_system *,
 891                                                   int *, EMACS_INT *));
 892 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 893 static int encode_coding P_ ((struct coding_system *));
 894 static Lisp_Object make_conversion_work_buffer P_ ((int));
 895 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 896 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 897 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 898
 899 static void
 900 record_conversion_result (struct coding_system *coding,
 901                           enum coding_result_code result)
 902 {
 903   coding->result = result;
 904   switch (result)
 905     {
 906     case CODING_RESULT_INSUFFICIENT_SRC:
 907       Vlast_code_conversion_error = Qinsufficient_source;
 908       break;
 909     case CODING_RESULT_INCONSISTENT_EOL:
 910       Vlast_code_conversion_error = Qinconsistent_eol;
 911       break;
 912     case CODING_RESULT_INVALID_SRC:
 913       Vlast_code_conversion_error = Qinvalid_source;
 914       break;
 915     case CODING_RESULT_INTERRUPT:
 916       Vlast_code_conversion_error = Qinterrupted;
 917       break;
 918     case CODING_RESULT_INSUFFICIENT_MEM:
 919       Vlast_code_conversion_error = Qinsufficient_memory;
 920       break;
 921     }
 922 }
 923
 924 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 925   do {                                                                       \
 926     charset_map_loaded = 0;                                                  \
 927     c = DECODE_CHAR (charset, code);                                         \
 928     if (charset_map_loaded)                                                  \
 929       {                                                                      \
 930         const unsigned char *orig = coding->source;                          \
 931         EMACS_INT offset;                                                    \
 932                                                                              \
 933         coding_set_source (coding);                                          \
 934         offset = coding->source - orig;                                      \
 935         src += offset;                                                       \
 936         src_base += offset;                                                  \
 937         src_end += offset;                                                   \
 938       }                                                                      \
 939   } while (0)
 940
 941
 942 #define ASSURE_DESTINATION(bytes)                               \
 943   do {                                                          \
 944     if (dst + (bytes) >= dst_end)                               \
 945       {                                                         \
 946         int more_bytes = charbuf_end - charbuf + (bytes);       \
 947                                                                 \
 948         dst = alloc_destination (coding, more_bytes, dst);      \
 949         dst_end = coding->destination + coding->dst_bytes;      \
 950       }                                                         \
 951   } while (0)
 952
 953
 954
 955 static void
 956 coding_set_source (coding)
 957      struct coding_system *coding;
 958 {
 959   if (BUFFERP (coding->src_object))
 960     {
 961       struct buffer *buf = XBUFFER (coding->src_object);
 962
 963       if (coding->src_pos < 0)
 964         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
 965       else
 966         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
 967     }
 968   else if (STRINGP (coding->src_object))
 969     {
 970       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
 971     }
 972   else
 973     /* Otherwise, the source is C string and is never relocated
 974        automatically.  Thus we don't have to update anything.  */
 975     ;
 976 }
 977
 978 static void
 979 coding_set_destination (coding)
 980      struct coding_system *coding;
 981 {
 982   if (BUFFERP (coding->dst_object))
 983     {
 984       if (coding->src_pos < 0)
 985         {
 986           coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
 987           coding->dst_bytes = (GAP_END_ADDR
 988                                - (coding->src_bytes - coding->consumed)
 989                                - coding->destination);
 990         }
 991       else
 992         {
 993           /* We are sure that coding->dst_pos_byte is before the gap
 994              of the buffer. */
 995           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
 996                                  + coding->dst_pos_byte - 1);
 997           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
 998                                - coding->destination);
 999         }
1000     }
1001   else
1002     /* Otherwise, the destination is C string and is never relocated
1003        automatically.  Thus we don't have to update anything.  */
1004     ;
1005 }
1006
1007
1008 static void
1009 coding_alloc_by_realloc (coding, bytes)
1010      struct coding_system *coding;
1011      EMACS_INT bytes;
1012 {
1013   coding->destination = (unsigned char *) xrealloc (coding->destination,
1014                                                     coding->dst_bytes + bytes);
1015   coding->dst_bytes += bytes;
1016 }
1017
1018 static void
1019 coding_alloc_by_making_gap (coding, bytes)
1020      struct coding_system *coding;
1021      EMACS_INT bytes;
1022 {
1023   if (BUFFERP (coding->dst_object)
1024       && EQ (coding->src_object, coding->dst_object))
1025     {
1026       EMACS_INT add = coding->src_bytes - coding->consumed;
1027
1028       GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1029       make_gap (bytes);
1030       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1031     }
1032   else
1033     {
1034       Lisp_Object this_buffer;
1035
1036       this_buffer = Fcurrent_buffer ();
1037       set_buffer_internal (XBUFFER (coding->dst_object));
1038       make_gap (bytes);
1039       set_buffer_internal (XBUFFER (this_buffer));
1040     }
1041 }
1042
1043
1044 static unsigned char *
1045 alloc_destination (coding, nbytes, dst)
1046      struct coding_system *coding;
1047      EMACS_INT nbytes;
1048      unsigned char *dst;
1049 {
1050   EMACS_INT offset = dst - coding->destination;
1051
1052   if (BUFFERP (coding->dst_object))
1053     coding_alloc_by_making_gap (coding, nbytes);
1054   else
1055     coding_alloc_by_realloc (coding, nbytes);
1056   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1057   coding_set_destination (coding);
1058   dst = coding->destination + offset;
1059   return dst;
1060 }
1061
1062 /** Macros for annotations.  */
1063
1064 /* Maximum length of annotation data (sum of annotations for
1065    composition and charset).  */
1066 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1067
1068 /* An annotation data is stored in the array coding->charbuf in this
1069    format:
1070      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1071    LENGTH is the number of elements in the annotation.
1072    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1073    NCHARS is the number of characters in the text annotated.
1074
1075    The format of the following elements depend on ANNOTATION_MASK.
1076
1077    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1078    follows:
1079      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1080    METHOD is one of enum composition_method.
1081    Optionnal COMPOSITION-COMPONENTS are characters and composition
1082    rules.
1083
1084    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1085    follows.  */
1086
1087 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1088   do {                                                  \
1089     *(buf)++ = -(len);                                  \
1090     *(buf)++ = (mask);                                  \
1091     *(buf)++ = (nchars);                                \
1092     coding->annotated = 1;                              \
1093   } while (0);
1094
1095 #define ADD_COMPOSITION_DATA(buf, nchars, method)                           \
1096   do {                                                                      \
1097     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1098     *buf++ = method;                                                        \
1099   } while (0)
1100
1101
1102 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1103   do {                                                                  \
1104     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1105     *buf++ = id;                                                        \
1106   } while (0)
1107
1108 \f
1109 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1110
1111
1112
1113 \f
1114 /*** 3. UTF-8 ***/
1115
1116 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1117    Check if a text is encoded in UTF-8.  If it is, return 1, else
1118    return 0.  */
1119
1120 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1121 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1122 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1123 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1124 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1125 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1126
1127 static int
1128 detect_coding_utf_8 (coding, detect_info)
1129      struct coding_system *coding;
1130      struct coding_detection_info *detect_info;
1131 {
1132   const unsigned char *src = coding->source, *src_base;
1133   const unsigned char *src_end = coding->source + coding->src_bytes;
1134   int multibytep = coding->src_multibyte;
1135   int consumed_chars = 0;
1136   int found = 0;
1137
1138   detect_info->checked |= CATEGORY_MASK_UTF_8;
1139   /* A coding system of this category is always ASCII compatible.  */
1140   src += coding->head_ascii;
1141
1142   while (1)
1143     {
1144       int c, c1, c2, c3, c4;
1145
1146       src_base = src;
1147       ONE_MORE_BYTE (c);
1148       if (c < 0 || UTF_8_1_OCTET_P (c))
1149         continue;
1150       ONE_MORE_BYTE (c1);
1151       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1152         break;
1153       if (UTF_8_2_OCTET_LEADING_P (c))
1154         {
1155           found = CATEGORY_MASK_UTF_8;
1156           continue;
1157         }
1158       ONE_MORE_BYTE (c2);
1159       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1160         break;
1161       if (UTF_8_3_OCTET_LEADING_P (c))
1162         {
1163           found = CATEGORY_MASK_UTF_8;
1164           continue;
1165         }
1166       ONE_MORE_BYTE (c3);
1167       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1168         break;
1169       if (UTF_8_4_OCTET_LEADING_P (c))
1170         {
1171           found = CATEGORY_MASK_UTF_8;
1172           continue;
1173         }
1174       ONE_MORE_BYTE (c4);
1175       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1176         break;
1177       if (UTF_8_5_OCTET_LEADING_P (c))
1178         {
1179           found = CATEGORY_MASK_UTF_8;
1180           continue;
1181         }
1182       break;
1183     }
1184   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1185   return 0;
1186
1187  no_more_source:
1188   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1189     {
1190       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1191       return 0;
1192     }
1193   detect_info->found |= found;
1194   return 1;
1195 }
1196
1197
1198 static void
1199 decode_coding_utf_8 (coding)
1200      struct coding_system *coding;
1201 {
1202   const unsigned char *src = coding->source + coding->consumed;
1203   const unsigned char *src_end = coding->source + coding->src_bytes;
1204   const unsigned char *src_base;
1205   int *charbuf = coding->charbuf + coding->charbuf_used;
1206   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1207   int consumed_chars = 0, consumed_chars_base;
1208   int multibytep = coding->src_multibyte;
1209   Lisp_Object attr, charset_list;
1210
1211   CODING_GET_INFO (coding, attr, charset_list);
1212
1213   while (1)
1214     {
1215       int c, c1, c2, c3, c4, c5;
1216
1217       src_base = src;
1218       consumed_chars_base = consumed_chars;
1219
1220       if (charbuf >= charbuf_end)
1221         break;
1222
1223       ONE_MORE_BYTE (c1);
1224       if (c1 < 0)
1225         {
1226           c = - c1;
1227         }
1228       else if (UTF_8_1_OCTET_P(c1))
1229         {
1230           c = c1;
1231         }
1232       else
1233         {
1234           ONE_MORE_BYTE (c2);
1235           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1236             goto invalid_code;
1237           if (UTF_8_2_OCTET_LEADING_P (c1))
1238             {
1239               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1240               /* Reject overlong sequences here and below.  Encoders
1241                  producing them are incorrect, they can be misleading,
1242                  and they mess up read/write invariance.  */
1243               if (c < 128)
1244                 goto invalid_code;
1245             }
1246           else
1247             {
1248               ONE_MORE_BYTE (c3);
1249               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1250                 goto invalid_code;
1251               if (UTF_8_3_OCTET_LEADING_P (c1))
1252                 {
1253                   c = (((c1 & 0xF) << 12)
1254                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1255                   if (c < 0x800
1256                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1257                     goto invalid_code;
1258                 }
1259               else
1260                 {
1261                   ONE_MORE_BYTE (c4);
1262                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1263                     goto invalid_code;
1264                   if (UTF_8_4_OCTET_LEADING_P (c1))
1265                     {
1266                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1267                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1268                     if (c < 0x10000)
1269                       goto invalid_code;
1270                     }
1271                   else
1272                     {
1273                       ONE_MORE_BYTE (c5);
1274                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1275                         goto invalid_code;
1276                       if (UTF_8_5_OCTET_LEADING_P (c1))
1277                         {
1278                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1279                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1280                                | (c5 & 0x3F));
1281                           if ((c > MAX_CHAR) || (c < 0x200000))
1282                             goto invalid_code;
1283                         }
1284                       else
1285                         goto invalid_code;
1286                     }
1287                 }
1288             }
1289         }
1290
1291       *charbuf++ = c;
1292       continue;
1293
1294     invalid_code:
1295       src = src_base;
1296       consumed_chars = consumed_chars_base;
1297       ONE_MORE_BYTE (c);
1298       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1299       coding->errors++;
1300     }
1301
1302  no_more_source:
1303   coding->consumed_char += consumed_chars_base;
1304   coding->consumed = src_base - coding->source;
1305   coding->charbuf_used = charbuf - coding->charbuf;
1306 }
1307
1308
1309 static int
1310 encode_coding_utf_8 (coding)
1311      struct coding_system *coding;
1312 {
1313   int multibytep = coding->dst_multibyte;
1314   int *charbuf = coding->charbuf;
1315   int *charbuf_end = charbuf + coding->charbuf_used;
1316   unsigned char *dst = coding->destination + coding->produced;
1317   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1318   int produced_chars = 0;
1319   int c;
1320
1321   if (multibytep)
1322     {
1323       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1324
1325       while (charbuf < charbuf_end)
1326         {
1327           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1328
1329           ASSURE_DESTINATION (safe_room);
1330           c = *charbuf++;
1331           if (CHAR_BYTE8_P (c))
1332             {
1333               c = CHAR_TO_BYTE8 (c);
1334               EMIT_ONE_BYTE (c);
1335             }
1336           else
1337             {
1338               CHAR_STRING_ADVANCE (c, pend);
1339               for (p = str; p < pend; p++)
1340                 EMIT_ONE_BYTE (*p);
1341             }
1342         }
1343     }
1344   else
1345     {
1346       int safe_room = MAX_MULTIBYTE_LENGTH;
1347
1348       while (charbuf < charbuf_end)
1349         {
1350           ASSURE_DESTINATION (safe_room);
1351           c = *charbuf++;
1352           dst += CHAR_STRING (c, dst);
1353           produced_chars++;
1354         }
1355     }
1356   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1357   coding->produced_char += produced_chars;
1358   coding->produced = dst - coding->destination;
1359   return 0;
1360 }
1361
1362
1363 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1364    Check if a text is encoded in one of UTF-16 based coding systems.
1365    If it is, return 1, else return 0.  */
1366
1367 #define UTF_16_HIGH_SURROGATE_P(val) \
1368   (((val) & 0xFC00) == 0xD800)
1369
1370 #define UTF_16_LOW_SURROGATE_P(val) \
1371   (((val) & 0xFC00) == 0xDC00)
1372
1373 #define UTF_16_INVALID_P(val)   \
1374   (((val) == 0xFFFE)            \
1375    || ((val) == 0xFFFF)         \
1376    || UTF_16_LOW_SURROGATE_P (val))
1377
1378
1379 static int
1380 detect_coding_utf_16 (coding, detect_info)
1381      struct coding_system *coding;
1382      struct coding_detection_info *detect_info;
1383 {
1384   const unsigned char *src = coding->source, *src_base = src;
1385   const unsigned char *src_end = coding->source + coding->src_bytes;
1386   int multibytep = coding->src_multibyte;
1387   int consumed_chars = 0;
1388   int c1, c2;
1389
1390   detect_info->checked |= CATEGORY_MASK_UTF_16;
1391   if (coding->mode & CODING_MODE_LAST_BLOCK
1392       && (coding->src_chars & 1))
1393     {
1394       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1395       return 0;
1396     }
1397
1398   ONE_MORE_BYTE (c1);
1399   ONE_MORE_BYTE (c2);
1400   if ((c1 == 0xFF) && (c2 == 0xFE))
1401     {
1402       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1403                              | CATEGORY_MASK_UTF_16_AUTO);
1404       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1405                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1406                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1407     }
1408   else if ((c1 == 0xFE) && (c2 == 0xFF))
1409     {
1410       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1411                              | CATEGORY_MASK_UTF_16_AUTO);
1412       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1413                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1414                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1415     }
1416   else if (c1 >= 0 && c2 >= 0)
1417     {
1418       unsigned char b1[256], b2[256];
1419       int b1_variants = 1, b2_variants = 1;
1420       int n;
1421
1422       bzero (b1, 256), bzero (b2, 256);
1423       b1[c1]++, b2[c2]++;
1424       for (n = 0; n < 256 && src < src_end; n++)
1425         {
1426           src_base = src;
1427           ONE_MORE_BYTE (c1);
1428           ONE_MORE_BYTE (c2);
1429           if (c1 < 0 || c2 < 0)
1430             break;
1431           if (! b1[c1++]) b1_variants++;
1432           if (! b2[c2++]) b2_variants++;
1433         }
1434       if (b1_variants < b2_variants)
1435         detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1436       else
1437         detect_info->found |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1438       detect_info->rejected
1439         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1440     }
1441  no_more_source:
1442   return 1;
1443 }
1444
1445 static void
1446 decode_coding_utf_16 (coding)
1447      struct coding_system *coding;
1448 {
1449   const unsigned char *src = coding->source + coding->consumed;
1450   const unsigned char *src_end = coding->source + coding->src_bytes;
1451   const unsigned char *src_base;
1452   int *charbuf = coding->charbuf + coding->charbuf_used;
1453   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1454   int consumed_chars = 0, consumed_chars_base;
1455   int multibytep = coding->src_multibyte;
1456   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1457   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1458   int surrogate = CODING_UTF_16_SURROGATE (coding);
1459   Lisp_Object attr, charset_list;
1460
1461   CODING_GET_INFO (coding, attr, charset_list);
1462
1463   if (bom == utf_16_with_bom)
1464     {
1465       int c, c1, c2;
1466
1467       src_base = src;
1468       ONE_MORE_BYTE (c1);
1469       ONE_MORE_BYTE (c2);
1470       c = (c1 << 8) | c2;
1471
1472       if (endian == utf_16_big_endian
1473           ? c != 0xFEFF : c != 0xFFFE)
1474         {
1475           /* The first two bytes are not BOM.  Treat them as bytes
1476              for a normal character.  */
1477           src = src_base;
1478           coding->errors++;
1479         }
1480       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1481     }
1482   else if (bom == utf_16_detect_bom)
1483     {
1484       /* We have already tried to detect BOM and failed in
1485          detect_coding.  */
1486       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1487     }
1488
1489   while (1)
1490     {
1491       int c, c1, c2;
1492
1493       src_base = src;
1494       consumed_chars_base = consumed_chars;
1495
1496       if (charbuf + 2 >= charbuf_end)
1497         break;
1498
1499       ONE_MORE_BYTE (c1);
1500       if (c1 < 0)
1501         {
1502           *charbuf++ = -c1;
1503           continue;
1504         }
1505       ONE_MORE_BYTE (c2);
1506       if (c2 < 0)
1507         {
1508           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1509           *charbuf++ = -c2;
1510           continue;
1511         }
1512       c = (endian == utf_16_big_endian
1513            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1514       if (surrogate)
1515         {
1516           if (! UTF_16_LOW_SURROGATE_P (c))
1517             {
1518               if (endian == utf_16_big_endian)
1519                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1520               else
1521                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1522               *charbuf++ = c1;
1523               *charbuf++ = c2;
1524               coding->errors++;
1525               if (UTF_16_HIGH_SURROGATE_P (c))
1526                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1527               else
1528                 *charbuf++ = c;
1529             }
1530           else
1531             {
1532               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1533               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1534               *charbuf++ = 0x10000 + c;
1535             }
1536         }
1537       else
1538         {
1539           if (UTF_16_HIGH_SURROGATE_P (c))
1540             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1541           else
1542             *charbuf++ = c;
1543         }
1544     }
1545
1546  no_more_source:
1547   coding->consumed_char += consumed_chars_base;
1548   coding->consumed = src_base - coding->source;
1549   coding->charbuf_used = charbuf - coding->charbuf;
1550 }
1551
1552 static int
1553 encode_coding_utf_16 (coding)
1554      struct coding_system *coding;
1555 {
1556   int multibytep = coding->dst_multibyte;
1557   int *charbuf = coding->charbuf;
1558   int *charbuf_end = charbuf + coding->charbuf_used;
1559   unsigned char *dst = coding->destination + coding->produced;
1560   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1561   int safe_room = 8;
1562   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1563   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1564   int produced_chars = 0;
1565   Lisp_Object attrs, charset_list;
1566   int c;
1567
1568   CODING_GET_INFO (coding, attrs, charset_list);
1569
1570   if (bom != utf_16_without_bom)
1571     {
1572       ASSURE_DESTINATION (safe_room);
1573       if (big_endian)
1574         EMIT_TWO_BYTES (0xFE, 0xFF);
1575       else
1576         EMIT_TWO_BYTES (0xFF, 0xFE);
1577       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1578     }
1579
1580   while (charbuf < charbuf_end)
1581     {
1582       ASSURE_DESTINATION (safe_room);
1583       c = *charbuf++;
1584       if (c >= MAX_UNICODE_CHAR)
1585         c = coding->default_char;
1586
1587       if (c < 0x10000)
1588         {
1589           if (big_endian)
1590             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1591           else
1592             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1593         }
1594       else
1595         {
1596           int c1, c2;
1597
1598           c -= 0x10000;
1599           c1 = (c >> 10) + 0xD800;
1600           c2 = (c & 0x3FF) + 0xDC00;
1601           if (big_endian)
1602             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1603           else
1604             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1605         }
1606     }
1607   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1608   coding->produced = dst - coding->destination;
1609   coding->produced_char += produced_chars;
1610   return 0;
1611 }
1612
1613 \f
1614 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1615
1616 /* Emacs' internal format for representation of multiple character
1617    sets is a kind of multi-byte encoding, i.e. characters are
1618    represented by variable-length sequences of one-byte codes.
1619
1620    ASCII characters and control characters (e.g. `tab', `newline') are
1621    represented by one-byte sequences which are their ASCII codes, in
1622    the range 0x00 through 0x7F.
1623
1624    8-bit characters of the range 0x80..0x9F are represented by
1625    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1626    code + 0x20).
1627
1628    8-bit characters of the range 0xA0..0xFF are represented by
1629    one-byte sequences which are their 8-bit code.
1630
1631    The other characters are represented by a sequence of `base
1632    leading-code', optional `extended leading-code', and one or two
1633    `position-code's.  The length of the sequence is determined by the
1634    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1635    whereas extended leading-code and position-code take the range 0xA0
1636    through 0xFF.  See `charset.h' for more details about leading-code
1637    and position-code.
1638
1639    --- CODE RANGE of Emacs' internal format ---
1640    character set        range
1641    -------------        -----
1642    ascii                0x00..0x7F
1643    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1644    eight-bit-graphic    0xA0..0xBF
1645    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1646    ---------------------------------------------
1647
1648    As this is the internal character representation, the format is
1649    usually not used externally (i.e. in a file or in a data sent to a
1650    process).  But, it is possible to have a text externally in this
1651    format (i.e. by encoding by the coding system `emacs-mule').
1652
1653    In that case, a sequence of one-byte codes has a slightly different
1654    form.
1655
1656    At first, all characters in eight-bit-control are represented by
1657    one-byte sequences which are their 8-bit code.
1658
1659    Next, character composition data are represented by the byte
1660    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1661    where,
1662         METHOD is 0xF0 plus one of composition method (enum
1663         composition_method),
1664
1665         BYTES is 0xA0 plus a byte length of this composition data,
1666
1667         CHARS is 0x20 plus a number of characters composed by this
1668         data,
1669
1670         COMPONENTs are characters of multibye form or composition
1671         rules encoded by two-byte of ASCII codes.
1672
1673    In addition, for backward compatibility, the following formats are
1674    also recognized as composition data on decoding.
1675
1676    0x80 MSEQ ...
1677    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1678
1679    Here,
1680         MSEQ is a multibyte form but in these special format:
1681           ASCII: 0xA0 ASCII_CODE+0x80,
1682           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1683         RULE is a one byte code of the range 0xA0..0xF0 that
1684         represents a composition rule.
1685   */
1686
1687 char emacs_mule_bytes[256];
1688
1689 int
1690 emacs_mule_char (coding, src, nbytes, nchars, id)
1691      struct coding_system *coding;
1692      const unsigned char *src;
1693      int *nbytes, *nchars, *id;
1694 {
1695   const unsigned char *src_end = coding->source + coding->src_bytes;
1696   const unsigned char *src_base = src;
1697   int multibytep = coding->src_multibyte;
1698   struct charset *charset;
1699   unsigned code;
1700   int c;
1701   int consumed_chars = 0;
1702
1703   ONE_MORE_BYTE (c);
1704   if (c < 0)
1705     {
1706       c = -c;
1707       charset = emacs_mule_charset[0];
1708     }
1709   else
1710     {
1711       switch (emacs_mule_bytes[c])
1712         {
1713         case 2:
1714           if (! (charset = emacs_mule_charset[c]))
1715             goto invalid_code;
1716           ONE_MORE_BYTE (c);
1717           if (c < 0)
1718             goto invalid_code;
1719           code = c & 0x7F;
1720           break;
1721
1722         case 3:
1723           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1724               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1725             {
1726               ONE_MORE_BYTE (c);
1727               if (c < 0 || ! (charset = emacs_mule_charset[c]))
1728                 goto invalid_code;
1729               ONE_MORE_BYTE (c);
1730               if (c < 0)
1731                 goto invalid_code;
1732               code = c & 0x7F;
1733             }
1734           else
1735             {
1736               if (! (charset = emacs_mule_charset[c]))
1737                 goto invalid_code;
1738               ONE_MORE_BYTE (c);
1739               if (c < 0)
1740                 goto invalid_code;
1741               code = (c & 0x7F) << 8;
1742               ONE_MORE_BYTE (c);
1743               if (c < 0)
1744                 goto invalid_code;
1745               code |= c & 0x7F;
1746             }
1747           break;
1748
1749         case 4:
1750           ONE_MORE_BYTE (c);
1751           if (c < 0 || ! (charset = emacs_mule_charset[c]))
1752             goto invalid_code;
1753           ONE_MORE_BYTE (c);
1754           if (c < 0)
1755             goto invalid_code;
1756           code = (c & 0x7F) << 8;
1757           ONE_MORE_BYTE (c);
1758           if (c < 0)
1759             goto invalid_code;
1760           code |= c & 0x7F;
1761           break;
1762
1763         case 1:
1764           code = c;
1765           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1766                                      ? charset_ascii : charset_eight_bit);
1767           break;
1768
1769         default:
1770           abort ();
1771         }
1772       c = DECODE_CHAR (charset, code);
1773       if (c < 0)
1774         goto invalid_code;
1775     }
1776   *nbytes = src - src_base;
1777   *nchars = consumed_chars;
1778   if (id)
1779     *id = charset->id;
1780   return c;
1781
1782  no_more_source:
1783   return -2;
1784
1785  invalid_code:
1786   return -1;
1787 }
1788
1789
1790 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1791    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1792    else return 0.  */
1793
1794 static int
1795 detect_coding_emacs_mule (coding, detect_info)
1796      struct coding_system *coding;
1797      struct coding_detection_info *detect_info;
1798 {
1799   const unsigned char *src = coding->source, *src_base;
1800   const unsigned char *src_end = coding->source + coding->src_bytes;
1801   int multibytep = coding->src_multibyte;
1802   int consumed_chars = 0;
1803   int c;
1804   int found = 0;
1805
1806   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1807   /* A coding system of this category is always ASCII compatible.  */
1808   src += coding->head_ascii;
1809
1810   while (1)
1811     {
1812       src_base = src;
1813       ONE_MORE_BYTE (c);
1814       if (c < 0)
1815         continue;
1816       if (c == 0x80)
1817         {
1818           /* Perhaps the start of composite character.  We simple skip
1819              it because analyzing it is too heavy for detecting.  But,
1820              at least, we check that the composite character
1821              constitues of more than 4 bytes.  */
1822           const unsigned char *src_base;
1823
1824         repeat:
1825           src_base = src;
1826           do
1827             {
1828               ONE_MORE_BYTE (c);
1829             }
1830           while (c >= 0xA0);
1831
1832           if (src - src_base <= 4)
1833             break;
1834           found = CATEGORY_MASK_EMACS_MULE;
1835           if (c == 0x80)
1836             goto repeat;
1837         }
1838
1839       if (c < 0x80)
1840         {
1841           if (c < 0x20
1842               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1843             break;
1844         }
1845       else
1846         {
1847           int more_bytes = emacs_mule_bytes[*src_base] - 1;
1848
1849           while (more_bytes > 0)
1850             {
1851               ONE_MORE_BYTE (c);
1852               if (c < 0xA0)
1853                 {
1854                   src--;        /* Unread the last byte.  */
1855                   break;
1856                 }
1857               more_bytes--;
1858             }
1859           if (more_bytes != 0)
1860             break;
1861           found = CATEGORY_MASK_EMACS_MULE;
1862         }
1863     }
1864   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1865   return 0;
1866
1867  no_more_source:
1868   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1869     {
1870       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1871       return 0;
1872     }
1873   detect_info->found |= found;
1874   return 1;
1875 }
1876
1877
1878 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1879
1880 /* Decode a character represented as a component of composition
1881    sequence of Emacs 20/21 style at SRC.  Set C to that character and
1882    update SRC to the head of next character (or an encoded composition
1883    rule).  If SRC doesn't points a composition component, set C to -1.
1884    If SRC points an invalid byte sequence, global exit by a return
1885    value 0.  */
1886
1887 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
1888   if (1)                                                        \
1889     {                                                           \
1890       int c;                                                    \
1891       int nbytes, nchars;                                       \
1892                                                                 \
1893       if (src == src_end)                                       \
1894         break;                                                  \
1895       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1896       if (c < 0)                                                \
1897         {                                                       \
1898           if (c == -2)                                          \
1899             break;                                              \
1900           goto invalid_code;                                    \
1901         }                                                       \
1902       *buf++ = c;                                               \
1903       src += nbytes;                                            \
1904       consumed_chars += nchars;                                 \
1905     }                                                           \
1906   else
1907
1908
1909 /* Decode a composition rule represented as a component of composition
1910    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
1911    and increment BUF.  If SRC points an invalid byte sequence, set C
1912    to -1.  */
1913
1914 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
1915   do {                                                  \
1916     int c, gref, nref;                                  \
1917                                                         \
1918     if (src >= src_end)                                 \
1919       goto invalid_code;                                \
1920     ONE_MORE_BYTE_NO_CHECK (c);                         \
1921     c -= 0x20;                                          \
1922     if (c < 0 || c >= 81)                               \
1923       goto invalid_code;                                \
1924                                                         \
1925     gref = c / 9, nref = c % 9;                         \
1926     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1927   } while (0)
1928
1929
1930 /* Decode a composition rule represented as a component of composition
1931    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
1932    and increment BUF.  If SRC points an invalid byte sequence, set C
1933    to -1.  */
1934
1935 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
1936   do {                                                  \
1937     int gref, nref;                                     \
1938                                                         \
1939     if (src + 1>= src_end)                              \
1940       goto invalid_code;                                \
1941     ONE_MORE_BYTE_NO_CHECK (gref);                      \
1942     gref -= 0x20;                                       \
1943     ONE_MORE_BYTE_NO_CHECK (nref);                      \
1944     nref -= 0x20;                                       \
1945     if (gref < 0 || gref >= 81                          \
1946         || nref < 0 || nref >= 81)                      \
1947       goto invalid_code;                                \
1948     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1949   } while (0)
1950
1951
1952 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
1953   do {                                                                  \
1954     /* Emacs 21 style format.  The first three bytes at SRC are         \
1955        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
1956        the byte length of this composition information, CHARS is the    \
1957        number of characters composed by this composition.  */           \
1958     enum composition_method method = c - 0xF2;                          \
1959     int *charbuf_base = charbuf;                                        \
1960     int consumed_chars_limit;                                           \
1961     int nbytes, nchars;                                                 \
1962                                                                         \
1963     ONE_MORE_BYTE (c);                                                  \
1964     if (c < 0)                                                          \
1965       goto invalid_code;                                                \
1966     nbytes = c - 0xA0;                                                  \
1967     if (nbytes < 3)                                                     \
1968       goto invalid_code;                                                \
1969     ONE_MORE_BYTE (c);                                                  \
1970     if (c < 0)                                                          \
1971       goto invalid_code;                                                \
1972     nchars = c - 0xA0;                                                  \
1973     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
1974     consumed_chars_limit = consumed_chars_base + nbytes;                \
1975     if (method != COMPOSITION_RELATIVE)                                 \
1976       {                                                                 \
1977         int i = 0;                                                      \
1978         while (consumed_chars < consumed_chars_limit)                   \
1979           {                                                             \
1980             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
1981               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
1982             else                                                        \
1983               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
1984             i++;                                                        \
1985           }                                                             \
1986         if (consumed_chars < consumed_chars_limit)                      \
1987           goto invalid_code;                                            \
1988         charbuf_base[0] -= i;                                           \
1989       }                                                                 \
1990   } while (0)
1991
1992
1993 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)            \
1994   do {                                                          \
1995     /* Emacs 20 style format for relative composition.  */      \
1996     /* Store multibyte form of characters to be composed.  */   \
1997     enum composition_method method = COMPOSITION_RELATIVE;      \
1998     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1999     int *buf = components;                                      \
2000     int i, j;                                                   \
2001                                                                 \
2002     src = src_base;                                             \
2003     ONE_MORE_BYTE (c);          /* skip 0x80 */                 \
2004     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
2005       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                 \
2006     if (i < 2)                                                  \
2007       goto invalid_code;                                        \
2008     ADD_COMPOSITION_DATA (charbuf, i, method);                  \
2009     for (j = 0; j < i; j++)                                     \
2010       *charbuf++ = components[j];                               \
2011   } while (0)
2012
2013
2014 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
2015   do {                                                          \
2016     /* Emacs 20 style format for rule-base composition.  */     \
2017     /* Store multibyte form of characters to be composed.  */   \
2018     enum composition_method method = COMPOSITION_WITH_RULE;     \
2019     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
2020     int *buf = components;                                      \
2021     int i, j;                                                   \
2022                                                                 \
2023     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
2024     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
2025       {                                                         \
2026         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
2027         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
2028       }                                                         \
2029     if (i < 1 || (buf - components) % 2 == 0)                   \
2030       goto invalid_code;                                        \
2031     if (charbuf + i + (i / 2) + 1 < charbuf_end)                \
2032       goto no_more_source;                                      \
2033     ADD_COMPOSITION_DATA (buf, i, method);                      \
2034     for (j = 0; j < i; j++)                                     \
2035       *charbuf++ = components[j];                               \
2036     for (j = 0; j < i; j += 2)                                  \
2037       *charbuf++ = components[j];                               \
2038   } while (0)
2039
2040
2041 static void
2042 decode_coding_emacs_mule (coding)
2043      struct coding_system *coding;
2044 {
2045   const unsigned char *src = coding->source + coding->consumed;
2046   const unsigned char *src_end = coding->source + coding->src_bytes;
2047   const unsigned char *src_base;
2048   int *charbuf = coding->charbuf + coding->charbuf_used;
2049   int *charbuf_end
2050     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2051   int consumed_chars = 0, consumed_chars_base;
2052   int multibytep = coding->src_multibyte;
2053   Lisp_Object attrs, charset_list;
2054   int char_offset = coding->produced_char;
2055   int last_offset = char_offset;
2056   int last_id = charset_ascii;
2057
2058   CODING_GET_INFO (coding, attrs, charset_list);
2059
2060   while (1)
2061     {
2062       int c;
2063
2064       src_base = src;
2065       consumed_chars_base = consumed_chars;
2066
2067       if (charbuf >= charbuf_end)
2068         break;
2069
2070       ONE_MORE_BYTE (c);
2071       if (c < 0)
2072         {
2073           *charbuf++ = -c;
2074           char_offset++;
2075         }
2076       else if (c < 0x80)
2077         {
2078           *charbuf++ = c;
2079           char_offset++;
2080         }
2081       else if (c == 0x80)
2082         {
2083           ONE_MORE_BYTE (c);
2084           if (c < 0)
2085             goto invalid_code;
2086           if (c - 0xF2 >= COMPOSITION_RELATIVE
2087               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2088             DECODE_EMACS_MULE_21_COMPOSITION (c);
2089           else if (c < 0xC0)
2090             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2091           else if (c == 0xFF)
2092             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2093           else
2094             goto invalid_code;
2095         }
2096       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2097         {
2098           int nbytes, nchars;
2099           int id;
2100
2101           src = src_base;
2102           consumed_chars = consumed_chars_base;
2103           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2104           if (c < 0)
2105             {
2106               if (c == -2)
2107                 break;
2108               goto invalid_code;
2109             }
2110           if (last_id != id)
2111             {
2112               if (last_id != charset_ascii)
2113                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2114               last_id = id;
2115               last_offset = char_offset;
2116             }
2117           *charbuf++ = c;
2118           src += nbytes;
2119           consumed_chars += nchars;
2120           char_offset++;
2121         }
2122       continue;
2123
2124     invalid_code:
2125       src = src_base;
2126       consumed_chars = consumed_chars_base;
2127       ONE_MORE_BYTE (c);
2128       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2129       char_offset++;
2130       coding->errors++;
2131     }
2132
2133  no_more_source:
2134   if (last_id != charset_ascii)
2135     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2136   coding->consumed_char += consumed_chars_base;
2137   coding->consumed = src_base - coding->source;
2138   coding->charbuf_used = charbuf - coding->charbuf;
2139 }
2140
2141
2142 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2143   do {                                          \
2144     if (id < 0xA0)                              \
2145       codes[0] = id, codes[1] = 0;              \
2146     else if (id < 0xE0)                         \
2147       codes[0] = 0x9A, codes[1] = id;           \
2148     else if (id < 0xF0)                         \
2149       codes[0] = 0x9B, codes[1] = id;           \
2150     else if (id < 0xF5)                         \
2151       codes[0] = 0x9C, codes[1] = id;           \
2152     else                                        \
2153       codes[0] = 0x9D, codes[1] = id;           \
2154   } while (0);
2155
2156
2157 static int
2158 encode_coding_emacs_mule (coding)
2159      struct coding_system *coding;
2160 {
2161   int multibytep = coding->dst_multibyte;
2162   int *charbuf = coding->charbuf;
2163   int *charbuf_end = charbuf + coding->charbuf_used;
2164   unsigned char *dst = coding->destination + coding->produced;
2165   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2166   int safe_room = 8;
2167   int produced_chars = 0;
2168   Lisp_Object attrs, charset_list;
2169   int c;
2170   int preferred_charset_id = -1;
2171
2172   CODING_GET_INFO (coding, attrs, charset_list);
2173   if (! EQ (charset_list, Vemacs_mule_charset_list))
2174     {
2175       CODING_ATTR_CHARSET_LIST (attrs)
2176         = charset_list = Vemacs_mule_charset_list;
2177     }
2178
2179   while (charbuf < charbuf_end)
2180     {
2181       ASSURE_DESTINATION (safe_room);
2182       c = *charbuf++;
2183
2184       if (c < 0)
2185         {
2186           /* Handle an annotation.  */
2187           switch (*charbuf)
2188             {
2189             case CODING_ANNOTATE_COMPOSITION_MASK:
2190               /* Not yet implemented.  */
2191               break;
2192             case CODING_ANNOTATE_CHARSET_MASK:
2193               preferred_charset_id = charbuf[3];
2194               if (preferred_charset_id >= 0
2195                   && NILP (Fmemq (make_number (preferred_charset_id),
2196                                   charset_list)))
2197                 preferred_charset_id = -1;
2198               break;
2199             default:
2200               abort ();
2201             }
2202           charbuf += -c - 1;
2203           continue;
2204         }
2205
2206       if (ASCII_CHAR_P (c))
2207         EMIT_ONE_ASCII_BYTE (c);
2208       else if (CHAR_BYTE8_P (c))
2209         {
2210           c = CHAR_TO_BYTE8 (c);
2211           EMIT_ONE_BYTE (c);
2212         }
2213       else
2214         {
2215           struct charset *charset;
2216           unsigned code;
2217           int dimension;
2218           int emacs_mule_id;
2219           unsigned char leading_codes[2];
2220
2221           if (preferred_charset_id >= 0)
2222             {
2223               charset = CHARSET_FROM_ID (preferred_charset_id);
2224               if (! CHAR_CHARSET_P (c, charset))
2225                 charset = char_charset (c, charset_list, NULL);
2226             }
2227           else
2228             charset = char_charset (c, charset_list, &code);
2229           if (! charset)
2230             {
2231               c = coding->default_char;
2232               if (ASCII_CHAR_P (c))
2233                 {
2234                   EMIT_ONE_ASCII_BYTE (c);
2235                   continue;
2236                 }
2237               charset = char_charset (c, charset_list, &code);
2238             }
2239           dimension = CHARSET_DIMENSION (charset);
2240           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2241           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2242           EMIT_ONE_BYTE (leading_codes[0]);
2243           if (leading_codes[1])
2244             EMIT_ONE_BYTE (leading_codes[1]);
2245           if (dimension == 1)
2246             EMIT_ONE_BYTE (code | 0x80);
2247           else
2248             {
2249               code |= 0x8080;
2250               EMIT_ONE_BYTE (code >> 8);
2251               EMIT_ONE_BYTE (code & 0xFF);
2252             }
2253         }
2254     }
2255   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2256   coding->produced_char += produced_chars;
2257   coding->produced = dst - coding->destination;
2258   return 0;
2259 }
2260
2261 \f
2262 /*** 7. ISO2022 handlers ***/
2263
2264 /* The following note describes the coding system ISO2022 briefly.
2265    Since the intention of this note is to help understand the
2266    functions in this file, some parts are NOT ACCURATE or are OVERLY
2267    SIMPLIFIED.  For thorough understanding, please refer to the
2268    original document of ISO2022.  This is equivalent to the standard
2269    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2270
2271    ISO2022 provides many mechanisms to encode several character sets
2272    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2273    is encoded using bytes less than 128.  This may make the encoded
2274    text a little bit longer, but the text passes more easily through
2275    several types of gateway, some of which strip off the MSB (Most
2276    Significant Bit).
2277
2278    There are two kinds of character sets: control character sets and
2279    graphic character sets.  The former contain control characters such
2280    as `newline' and `escape' to provide control functions (control
2281    functions are also provided by escape sequences).  The latter
2282    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2283    two control character sets and many graphic character sets.
2284
2285    Graphic character sets are classified into one of the following
2286    four classes, according to the number of bytes (DIMENSION) and
2287    number of characters in one dimension (CHARS) of the set:
2288    - DIMENSION1_CHARS94
2289    - DIMENSION1_CHARS96
2290    - DIMENSION2_CHARS94
2291    - DIMENSION2_CHARS96
2292
2293    In addition, each character set is assigned an identification tag,
2294    unique for each set, called the "final character" (denoted as <F>
2295    hereafter).  The <F> of each character set is decided by ECMA(*)
2296    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2297    (0x30..0x3F are for private use only).
2298
2299    Note (*): ECMA = European Computer Manufacturers Association
2300
2301    Here are examples of graphic character sets [NAME(<F>)]:
2302         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2303         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2304         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2305         o DIMENSION2_CHARS96 -- none for the moment
2306
2307    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2308         C0 [0x00..0x1F] -- control character plane 0
2309         GL [0x20..0x7F] -- graphic character plane 0
2310         C1 [0x80..0x9F] -- control character plane 1
2311         GR [0xA0..0xFF] -- graphic character plane 1
2312
2313    A control character set is directly designated and invoked to C0 or
2314    C1 by an escape sequence.  The most common case is that:
2315    - ISO646's  control character set is designated/invoked to C0, and
2316    - ISO6429's control character set is designated/invoked to C1,
2317    and usually these designations/invocations are omitted in encoded
2318    text.  In a 7-bit environment, only C0 can be used, and a control
2319    character for C1 is encoded by an appropriate escape sequence to
2320    fit into the environment.  All control characters for C1 are
2321    defined to have corresponding escape sequences.
2322
2323    A graphic character set is at first designated to one of four
2324    graphic registers (G0 through G3), then these graphic registers are
2325    invoked to GL or GR.  These designations and invocations can be
2326    done independently.  The most common case is that G0 is invoked to
2327    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2328    these invocations and designations are omitted in encoded text.
2329    In a 7-bit environment, only GL can be used.
2330
2331    When a graphic character set of CHARS94 is invoked to GL, codes
2332    0x20 and 0x7F of the GL area work as control characters SPACE and
2333    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2334    be used.
2335
2336    There are two ways of invocation: locking-shift and single-shift.
2337    With locking-shift, the invocation lasts until the next different
2338    invocation, whereas with single-shift, the invocation affects the
2339    following character only and doesn't affect the locking-shift
2340    state.  Invocations are done by the following control characters or
2341    escape sequences:
2342
2343    ----------------------------------------------------------------------
2344    abbrev  function                  cntrl escape seq   description
2345    ----------------------------------------------------------------------
2346    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2347    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2348    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2349    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2350    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2351    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2352    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2353    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2354    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2355    ----------------------------------------------------------------------
2356    (*) These are not used by any known coding system.
2357
2358    Control characters for these functions are defined by macros
2359    ISO_CODE_XXX in `coding.h'.
2360
2361    Designations are done by the following escape sequences:
2362    ----------------------------------------------------------------------
2363    escape sequence      description
2364    ----------------------------------------------------------------------
2365    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2366    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2367    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2368    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2369    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2370    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2371    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2372    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2373    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2374    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2375    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2376    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2377    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2378    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2379    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2380    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2381    ----------------------------------------------------------------------
2382
2383    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2384    of dimension 1, chars 94, and final character <F>, etc...
2385
2386    Note (*): Although these designations are not allowed in ISO2022,
2387    Emacs accepts them on decoding, and produces them on encoding
2388    CHARS96 character sets in a coding system which is characterized as
2389    7-bit environment, non-locking-shift, and non-single-shift.
2390
2391    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2392    '(' must be omitted.  We refer to this as "short-form" hereafter.
2393
2394    Now you may notice that there are a lot of ways of encoding the
2395    same multilingual text in ISO2022.  Actually, there exist many
2396    coding systems such as Compound Text (used in X11's inter client
2397    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2398    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2399    localized platforms), and all of these are variants of ISO2022.
2400
2401    In addition to the above, Emacs handles two more kinds of escape
2402    sequences: ISO6429's direction specification and Emacs' private
2403    sequence for specifying character composition.
2404
2405    ISO6429's direction specification takes the following form:
2406         o CSI ']'      -- end of the current direction
2407         o CSI '0' ']'  -- end of the current direction
2408         o CSI '1' ']'  -- start of left-to-right text
2409         o CSI '2' ']'  -- start of right-to-left text
2410    The control character CSI (0x9B: control sequence introducer) is
2411    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2412
2413    Character composition specification takes the following form:
2414         o ESC '0' -- start relative composition
2415         o ESC '1' -- end composition
2416         o ESC '2' -- start rule-base composition (*)
2417         o ESC '3' -- start relative composition with alternate chars  (**)
2418         o ESC '4' -- start rule-base composition with alternate chars  (**)
2419   Since these are not standard escape sequences of any ISO standard,
2420   the use of them with these meanings is restricted to Emacs only.
2421
2422   (*) This form is used only in Emacs 20.7 and older versions,
2423   but newer versions can safely decode it.
2424   (**) This form is used only in Emacs 21.1 and newer versions,
2425   and older versions can't decode it.
2426
2427   Here's a list of example usages of these composition escape
2428   sequences (categorized by `enum composition_method').
2429
2430   COMPOSITION_RELATIVE:
2431         ESC 0 CHAR [ CHAR ] ESC 1
2432   COMPOSITION_WITH_RULE:
2433         ESC 2 CHAR [ RULE CHAR ] ESC 1
2434   COMPOSITION_WITH_ALTCHARS:
2435         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2436   COMPOSITION_WITH_RULE_ALTCHARS:
2437         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2438
2439 enum iso_code_class_type iso_code_class[256];
2440
2441 #define SAFE_CHARSET_P(coding, id)      \
2442   ((id) <= (coding)->max_charset_id     \
2443    && (coding)->safe_charsets[id] >= 0)
2444
2445
2446 #define SHIFT_OUT_OK(category)  \
2447   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2448
2449 static void
2450 setup_iso_safe_charsets (attrs)
2451      Lisp_Object attrs;
2452 {
2453   Lisp_Object charset_list, safe_charsets;
2454   Lisp_Object request;
2455   Lisp_Object reg_usage;
2456   Lisp_Object tail;
2457   int reg94, reg96;
2458   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2459   int max_charset_id;
2460
2461   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2462   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2463       && ! EQ (charset_list, Viso_2022_charset_list))
2464     {
2465       CODING_ATTR_CHARSET_LIST (attrs)
2466         = charset_list = Viso_2022_charset_list;
2467       ASET (attrs, coding_attr_safe_charsets, Qnil);
2468     }
2469
2470   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2471     return;
2472
2473   max_charset_id = 0;
2474   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2475     {
2476       int id = XINT (XCAR (tail));
2477       if (max_charset_id < id)
2478         max_charset_id = id;
2479     }
2480
2481   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2482                                 make_number (255));
2483   request = AREF (attrs, coding_attr_iso_request);
2484   reg_usage = AREF (attrs, coding_attr_iso_usage);
2485   reg94 = XINT (XCAR (reg_usage));
2486   reg96 = XINT (XCDR (reg_usage));
2487
2488   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2489     {
2490       Lisp_Object id;
2491       Lisp_Object reg;
2492       struct charset *charset;
2493
2494       id = XCAR (tail);
2495       charset = CHARSET_FROM_ID (XINT (id));
2496       reg = Fcdr (Fassq (id, request));
2497       if (! NILP (reg))
2498         SSET (safe_charsets, XINT (id), XINT (reg));
2499       else if (charset->iso_chars_96)
2500         {
2501           if (reg96 < 4)
2502             SSET (safe_charsets, XINT (id), reg96);
2503         }
2504       else
2505         {
2506           if (reg94 < 4)
2507             SSET (safe_charsets, XINT (id), reg94);
2508         }
2509     }
2510   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2511 }
2512
2513
2514 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2515    Check if a text is encoded in one of ISO-2022 based codig systems.
2516    If it is, return 1, else return 0.  */
2517
2518 static int
2519 detect_coding_iso_2022 (coding, detect_info)
2520      struct coding_system *coding;
2521      struct coding_detection_info *detect_info;
2522 {
2523   const unsigned char *src = coding->source, *src_base = src;
2524   const unsigned char *src_end = coding->source + coding->src_bytes;
2525   int multibytep = coding->src_multibyte;
2526   int single_shifting = 0;
2527   int id;
2528   int c, c1;
2529   int consumed_chars = 0;
2530   int i;
2531   int rejected = 0;
2532   int found = 0;
2533
2534   detect_info->checked |= CATEGORY_MASK_ISO;
2535
2536   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2537     {
2538       struct coding_system *this = &(coding_categories[i]);
2539       Lisp_Object attrs, val;
2540
2541       attrs = CODING_ID_ATTRS (this->id);
2542       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2543           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2544         setup_iso_safe_charsets (attrs);
2545       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2546       this->max_charset_id = SCHARS (val) - 1;
2547       this->safe_charsets = (char *) SDATA (val);
2548     }
2549
2550   /* A coding system of this category is always ASCII compatible.  */
2551   src += coding->head_ascii;
2552
2553   while (rejected != CATEGORY_MASK_ISO)
2554     {
2555       src_base = src;
2556       ONE_MORE_BYTE (c);
2557       switch (c)
2558         {
2559         case ISO_CODE_ESC:
2560           if (inhibit_iso_escape_detection)
2561             break;
2562           single_shifting = 0;
2563           ONE_MORE_BYTE (c);
2564           if (c >= '(' && c <= '/')
2565             {
2566               /* Designation sequence for a charset of dimension 1.  */
2567               ONE_MORE_BYTE (c1);
2568               if (c1 < ' ' || c1 >= 0x80
2569                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2570                 /* Invalid designation sequence.  Just ignore.  */
2571                 break;
2572             }
2573           else if (c == '$')
2574             {
2575               /* Designation sequence for a charset of dimension 2.  */
2576               ONE_MORE_BYTE (c);
2577               if (c >= '@' && c <= 'B')
2578                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2579                 id = iso_charset_table[1][0][c];
2580               else if (c >= '(' && c <= '/')
2581                 {
2582                   ONE_MORE_BYTE (c1);
2583                   if (c1 < ' ' || c1 >= 0x80
2584                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2585                     /* Invalid designation sequence.  Just ignore.  */
2586                     break;
2587                 }
2588               else
2589                 /* Invalid designation sequence.  Just ignore it.  */
2590                 break;
2591             }
2592           else if (c == 'N' || c == 'O')
2593             {
2594               /* ESC <Fe> for SS2 or SS3.  */
2595               single_shifting = 1;
2596               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2597               break;
2598             }
2599           else if (c >= '0' && c <= '4')
2600             {
2601               /* ESC <Fp> for start/end composition.  */
2602               found |= CATEGORY_MASK_ISO;
2603               break;
2604             }
2605           else
2606             {
2607               /* Invalid escape sequence.  Just ignore it.  */
2608               break;
2609             }
2610
2611           /* We found a valid designation sequence for CHARSET.  */
2612           rejected |= CATEGORY_MASK_ISO_8BIT;
2613           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2614                               id))
2615             found |= CATEGORY_MASK_ISO_7;
2616           else
2617             rejected |= CATEGORY_MASK_ISO_7;
2618           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2619                               id))
2620             found |= CATEGORY_MASK_ISO_7_TIGHT;
2621           else
2622             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2623           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2624                               id))
2625             found |= CATEGORY_MASK_ISO_7_ELSE;
2626           else
2627             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2628           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2629                               id))
2630             found |= CATEGORY_MASK_ISO_8_ELSE;
2631           else
2632             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2633           break;
2634
2635         case ISO_CODE_SO:
2636         case ISO_CODE_SI:
2637           /* Locking shift out/in.  */
2638           if (inhibit_iso_escape_detection)
2639             break;
2640           single_shifting = 0;
2641           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2642           found |= CATEGORY_MASK_ISO_ELSE;
2643           break;
2644
2645         case ISO_CODE_CSI:
2646           /* Control sequence introducer.  */
2647           single_shifting = 0;
2648           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2649           found |= CATEGORY_MASK_ISO_8_ELSE;
2650           goto check_extra_latin;
2651
2652         case ISO_CODE_SS2:
2653         case ISO_CODE_SS3:
2654           /* Single shift.   */
2655           if (inhibit_iso_escape_detection)
2656             break;
2657           single_shifting = 0;
2658           rejected |= CATEGORY_MASK_ISO_7BIT;
2659           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2660               & CODING_ISO_FLAG_SINGLE_SHIFT)
2661             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
2662           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2663               & CODING_ISO_FLAG_SINGLE_SHIFT)
2664             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2665           if (single_shifting)
2666             break;
2667           goto check_extra_latin;
2668
2669         default:
2670           if (c < 0)
2671             continue;
2672           if (c < 0x80)
2673             {
2674               single_shifting = 0;
2675               break;
2676             }
2677           if (c >= 0xA0)
2678             {
2679               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2680               found |= CATEGORY_MASK_ISO_8_1;
2681               /* Check the length of succeeding codes of the range
2682                  0xA0..0FF.  If the byte length is even, we include
2683                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2684                  only when we are not single shifting.  */
2685               if (! single_shifting
2686                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2687                 {
2688                   int i = 1;
2689                   while (src < src_end)
2690                     {
2691                       ONE_MORE_BYTE (c);
2692                       if (c < 0xA0)
2693                         break;
2694                       i++;
2695                     }
2696
2697                   if (i & 1 && src < src_end)
2698                     rejected |= CATEGORY_MASK_ISO_8_2;
2699                   else
2700                     found |= CATEGORY_MASK_ISO_8_2;
2701                 }
2702               break;
2703             }
2704         check_extra_latin:
2705           single_shifting = 0;
2706           if (! VECTORP (Vlatin_extra_code_table)
2707               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2708             {
2709               rejected = CATEGORY_MASK_ISO;
2710               break;
2711             }
2712           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2713               & CODING_ISO_FLAG_LATIN_EXTRA)
2714             found |= CATEGORY_MASK_ISO_8_1;
2715           else
2716             rejected |= CATEGORY_MASK_ISO_8_1;
2717           rejected |= CATEGORY_MASK_ISO_8_2;
2718         }
2719     }
2720   detect_info->rejected |= CATEGORY_MASK_ISO;
2721   return 0;
2722
2723  no_more_source:
2724   detect_info->rejected |= rejected;
2725   detect_info->found |= (found & ~rejected);
2726   return 1;
2727 }
2728
2729
2730 /* Set designation state into CODING.  */
2731 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2732   do {                                                                  \
2733     int id, prev;                                                       \
2734                                                                         \
2735     if (final < '0' || final >= 128                                     \
2736         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2737         || !SAFE_CHARSET_P (coding, id))                                \
2738       {                                                                 \
2739         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2740         goto invalid_code;                                              \
2741       }                                                                 \
2742     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2743     if (id == charset_jisx0201_roman)                                   \
2744       {                                                                 \
2745         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2746           id = charset_ascii;                                           \
2747       }                                                                 \
2748     else if (id == charset_jisx0208_1978)                               \
2749       {                                                                 \
2750         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2751           id = charset_jisx0208;                                        \
2752       }                                                                 \
2753     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2754     /* If there was an invalid designation to REG previously, and this  \
2755        designation is ASCII to REG, we should keep this designation     \
2756        sequence.  */                                                    \
2757     if (prev == -2 && id == charset_ascii)                              \
2758       goto invalid_code;                                                \
2759   } while (0)
2760
2761
2762 #define MAYBE_FINISH_COMPOSITION()                              \
2763   do {                                                          \
2764     int i;                                                      \
2765     if (composition_state == COMPOSING_NO)                      \
2766       break;                                                    \
2767     /* It is assured that we have enough room for producing     \
2768        characters stored in the table `components'.  */         \
2769     if (charbuf + component_idx > charbuf_end)                  \
2770       goto no_more_source;                                      \
2771     composition_state = COMPOSING_NO;                           \
2772     if (method == COMPOSITION_RELATIVE                          \
2773         || method == COMPOSITION_WITH_ALTCHARS)                 \
2774       {                                                         \
2775         for (i = 0; i < component_idx; i++)                     \
2776           *charbuf++ = components[i];                           \
2777         char_offset += component_idx;                           \
2778       }                                                         \
2779     else                                                        \
2780       {                                                         \
2781         for (i = 0; i < component_idx; i += 2)                  \
2782           *charbuf++ = components[i];                           \
2783         char_offset += (component_idx / 2) + 1;                 \
2784       }                                                         \
2785   } while (0)
2786
2787
2788 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2789    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2790    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2791    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2792    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2793   */
2794
2795 #define DECODE_COMPOSITION_START(c1)                                    \
2796   do {                                                                  \
2797     if (c1 == '0'                                                       \
2798         && composition_state == COMPOSING_COMPONENT_RULE)               \
2799       {                                                                 \
2800         component_len = component_idx;                                  \
2801         composition_state = COMPOSING_CHAR;                             \
2802       }                                                                 \
2803     else                                                                \
2804       {                                                                 \
2805         const unsigned char *p;                                         \
2806                                                                         \
2807         MAYBE_FINISH_COMPOSITION ();                                    \
2808         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
2809           goto no_more_source;                                          \
2810         for (p = src; p < src_end - 1; p++)                             \
2811           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
2812             break;                                                      \
2813         if (p == src_end - 1)                                           \
2814           {                                                             \
2815             if (coding->mode & CODING_MODE_LAST_BLOCK)                  \
2816               goto invalid_code;                                        \
2817             goto no_more_source;                                        \
2818           }                                                             \
2819                                                                         \
2820         /* This is surely the start of a composition.  */               \
2821         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
2822                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
2823                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
2824                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
2825         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
2826                              : COMPOSING_COMPONENT_CHAR);               \
2827         component_idx = component_len = 0;                              \
2828       }                                                                 \
2829   } while (0)
2830
2831
2832 /* Handle compositoin end sequence ESC 1.  */
2833
2834 #define DECODE_COMPOSITION_END()                                        \
2835   do {                                                                  \
2836     int nchars = (component_len > 0 ? component_idx - component_len     \
2837                   : method == COMPOSITION_RELATIVE ? component_idx      \
2838                   : (component_idx + 1) / 2);                           \
2839     int i;                                                              \
2840     int *saved_charbuf = charbuf;                                       \
2841                                                                         \
2842     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
2843     if (method != COMPOSITION_RELATIVE)                                 \
2844       {                                                                 \
2845         if (component_len == 0)                                         \
2846           for (i = 0; i < component_idx; i++)                           \
2847             *charbuf++ = components[i];                                 \
2848         else                                                            \
2849           for (i = 0; i < component_len; i++)                           \
2850             *charbuf++ = components[i];                                 \
2851         *saved_charbuf = saved_charbuf - charbuf;                       \
2852       }                                                                 \
2853     if (method == COMPOSITION_WITH_RULE)                                \
2854       for (i = 0; i < component_idx; i += 2, char_offset++)             \
2855         *charbuf++ = components[i];                                     \
2856     else                                                                \
2857       for (i = component_len; i < component_idx; i++, char_offset++)    \
2858         *charbuf++ = components[i];                                     \
2859     coding->annotated = 1;                                              \
2860     composition_state = COMPOSING_NO;                                   \
2861   } while (0)
2862
2863
2864 /* Decode a composition rule from the byte C1 (and maybe one more byte
2865    from SRC) and store one encoded composition rule in
2866    coding->cmp_data.  */
2867
2868 #define DECODE_COMPOSITION_RULE(c1)                                     \
2869   do {                                                                  \
2870     (c1) -= 32;                                                         \
2871     if (c1 < 81)                /* old format (before ver.21) */        \
2872       {                                                                 \
2873         int gref = (c1) / 9;                                            \
2874         int nref = (c1) % 9;                                            \
2875         if (gref == 4) gref = 10;                                       \
2876         if (nref == 4) nref = 10;                                       \
2877         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
2878       }                                                                 \
2879     else if (c1 < 93)           /* new format (after ver.21) */         \
2880       {                                                                 \
2881         ONE_MORE_BYTE (c2);                                             \
2882         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
2883       }                                                                 \
2884     else                                                                \
2885       c1 = 0;                                                           \
2886   } while (0)
2887
2888
2889 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2890
2891 static void
2892 decode_coding_iso_2022 (coding)
2893      struct coding_system *coding;
2894 {
2895   const unsigned char *src = coding->source + coding->consumed;
2896   const unsigned char *src_end = coding->source + coding->src_bytes;
2897   const unsigned char *src_base;
2898   int *charbuf = coding->charbuf + coding->charbuf_used;
2899   int *charbuf_end
2900     = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
2901   int consumed_chars = 0, consumed_chars_base;
2902   int multibytep = coding->src_multibyte;
2903   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
2904   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2905   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2906   struct charset *charset;
2907   int c;
2908   /* For handling composition sequence.  */
2909 #define COMPOSING_NO                    0
2910 #define COMPOSING_CHAR                  1
2911 #define COMPOSING_RULE                  2
2912 #define COMPOSING_COMPONENT_CHAR        3
2913 #define COMPOSING_COMPONENT_RULE        4
2914
2915   int composition_state = COMPOSING_NO;
2916   enum composition_method method;
2917   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2918   int component_idx;
2919   int component_len;
2920   Lisp_Object attrs, charset_list;
2921   int char_offset = coding->produced_char;
2922   int last_offset = char_offset;
2923   int last_id = charset_ascii;
2924
2925   CODING_GET_INFO (coding, attrs, charset_list);
2926   setup_iso_safe_charsets (attrs);
2927
2928   while (1)
2929     {
2930       int c1, c2;
2931
2932       src_base = src;
2933       consumed_chars_base = consumed_chars;
2934
2935       if (charbuf >= charbuf_end)
2936         break;
2937
2938       ONE_MORE_BYTE (c1);
2939       if (c1 < 0)
2940         goto invalid_code;
2941
2942       /* We produce at most one character.  */
2943       switch (iso_code_class [c1])
2944         {
2945         case ISO_0x20_or_0x7F:
2946           if (composition_state != COMPOSING_NO)
2947             {
2948               if (composition_state == COMPOSING_RULE
2949                   || composition_state == COMPOSING_COMPONENT_RULE)
2950                 {
2951                   DECODE_COMPOSITION_RULE (c1);
2952                   components[component_idx++] = c1;
2953                   composition_state--;
2954                   continue;
2955                 }
2956             }
2957           if (charset_id_0 < 0
2958               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2959             /* This is SPACE or DEL.  */
2960             charset = CHARSET_FROM_ID (charset_ascii);
2961           else
2962             charset = CHARSET_FROM_ID (charset_id_0);
2963           break;
2964
2965         case ISO_graphic_plane_0:
2966           if (composition_state != COMPOSING_NO)
2967             {
2968               if (composition_state == COMPOSING_RULE
2969                   || composition_state == COMPOSING_COMPONENT_RULE)
2970                 {
2971                   DECODE_COMPOSITION_RULE (c1);
2972                   components[component_idx++] = c1;
2973                   composition_state--;
2974                   continue;
2975                 }
2976             }
2977           charset = CHARSET_FROM_ID (charset_id_0);
2978           break;
2979
2980         case ISO_0xA0_or_0xFF:
2981           if (charset_id_1 < 0
2982               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2983               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2984             goto invalid_code;
2985           /* This is a graphic character, we fall down ... */
2986
2987         case ISO_graphic_plane_1:
2988           if (charset_id_1 < 0)
2989             goto invalid_code;
2990           charset = CHARSET_FROM_ID (charset_id_1);
2991           break;
2992
2993         case ISO_control_0:
2994           MAYBE_FINISH_COMPOSITION ();
2995           charset = CHARSET_FROM_ID (charset_ascii);
2996           break;
2997
2998         case ISO_control_1:
2999           MAYBE_FINISH_COMPOSITION ();
3000           goto invalid_code;
3001
3002         case ISO_shift_out:
3003           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3004               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3005             goto invalid_code;
3006           CODING_ISO_INVOCATION (coding, 0) = 1;
3007           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3008           continue;
3009
3010         case ISO_shift_in:
3011           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3012             goto invalid_code;
3013           CODING_ISO_INVOCATION (coding, 0) = 0;
3014           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3015           continue;
3016
3017         case ISO_single_shift_2_7:
3018         case ISO_single_shift_2:
3019           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3020             goto invalid_code;
3021           /* SS2 is handled as an escape sequence of ESC 'N' */
3022           c1 = 'N';
3023           goto label_escape_sequence;
3024
3025         case ISO_single_shift_3:
3026           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3027             goto invalid_code;
3028           /* SS2 is handled as an escape sequence of ESC 'O' */
3029           c1 = 'O';
3030           goto label_escape_sequence;
3031
3032         case ISO_control_sequence_introducer:
3033           /* CSI is handled as an escape sequence of ESC '[' ...  */
3034           c1 = '[';
3035           goto label_escape_sequence;
3036
3037         case ISO_escape:
3038           ONE_MORE_BYTE (c1);
3039         label_escape_sequence:
3040           /* Escape sequences handled here are invocation,
3041              designation, direction specification, and character
3042              composition specification.  */
3043           switch (c1)
3044             {
3045             case '&':           /* revision of following character set */
3046               ONE_MORE_BYTE (c1);
3047               if (!(c1 >= '@' && c1 <= '~'))
3048                 goto invalid_code;
3049               ONE_MORE_BYTE (c1);
3050               if (c1 != ISO_CODE_ESC)
3051                 goto invalid_code;
3052               ONE_MORE_BYTE (c1);
3053               goto label_escape_sequence;
3054
3055             case '$':           /* designation of 2-byte character set */
3056               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3057                 goto invalid_code;
3058               ONE_MORE_BYTE (c1);
3059               if (c1 >= '@' && c1 <= 'B')
3060                 {       /* designation of JISX0208.1978, GB2312.1980,
3061                            or JISX0208.1980 */
3062                   DECODE_DESIGNATION (0, 2, 0, c1);
3063                 }
3064               else if (c1 >= 0x28 && c1 <= 0x2B)
3065                 {       /* designation of DIMENSION2_CHARS94 character set */
3066                   ONE_MORE_BYTE (c2);
3067                   DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
3068                 }
3069               else if (c1 >= 0x2C && c1 <= 0x2F)
3070                 {       /* designation of DIMENSION2_CHARS96 character set */
3071                   ONE_MORE_BYTE (c2);
3072                   DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
3073                 }
3074               else
3075                 goto invalid_code;
3076               /* We must update these variables now.  */
3077               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3078               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3079               continue;
3080
3081             case 'n':           /* invocation of locking-shift-2 */
3082               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3083                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3084                 goto invalid_code;
3085               CODING_ISO_INVOCATION (coding, 0) = 2;
3086               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3087               continue;
3088
3089             case 'o':           /* invocation of locking-shift-3 */
3090               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3091                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3092                 goto invalid_code;
3093               CODING_ISO_INVOCATION (coding, 0) = 3;
3094               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3095               continue;
3096
3097             case 'N':           /* invocation of single-shift-2 */
3098               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3099                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3100                 goto invalid_code;
3101               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
3102               ONE_MORE_BYTE (c1);
3103               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3104                 goto invalid_code;
3105               break;
3106
3107             case 'O':           /* invocation of single-shift-3 */
3108               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3109                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3110                 goto invalid_code;
3111               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
3112               ONE_MORE_BYTE (c1);
3113               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3114                 goto invalid_code;
3115               break;
3116
3117             case '0': case '2': case '3': case '4': /* start composition */
3118               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3119                 goto invalid_code;
3120               DECODE_COMPOSITION_START (c1);
3121               continue;
3122
3123             case '1':           /* end composition */
3124               if (composition_state == COMPOSING_NO)
3125                 goto invalid_code;
3126               DECODE_COMPOSITION_END ();
3127               continue;
3128
3129             case '[':           /* specification of direction */
3130               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3131                 goto invalid_code;
3132               /* For the moment, nested direction is not supported.
3133                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3134                  left-to-right, and nozero means right-to-left.  */
3135               ONE_MORE_BYTE (c1);
3136               switch (c1)
3137                 {
3138                 case ']':       /* end of the current direction */
3139                   coding->mode &= ~CODING_MODE_DIRECTION;
3140
3141                 case '0':       /* end of the current direction */
3142                 case '1':       /* start of left-to-right direction */
3143                   ONE_MORE_BYTE (c1);
3144                   if (c1 == ']')
3145                     coding->mode &= ~CODING_MODE_DIRECTION;
3146                   else
3147                     goto invalid_code;
3148                   break;
3149
3150                 case '2':       /* start of right-to-left direction */
3151                   ONE_MORE_BYTE (c1);
3152                   if (c1 == ']')
3153                     coding->mode |= CODING_MODE_DIRECTION;
3154                   else
3155                     goto invalid_code;
3156                   break;
3157
3158                 default:
3159                   goto invalid_code;
3160                 }
3161               continue;
3162
3163             case '%':
3164               ONE_MORE_BYTE (c1);
3165               if (c1 == '/')
3166                 {
3167                   /* CTEXT extended segment:
3168                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3169                      We keep these bytes as is for the moment.
3170                      They may be decoded by post-read-conversion.  */
3171                   int dim, M, L;
3172                   int size;
3173
3174                   ONE_MORE_BYTE (dim);
3175                   ONE_MORE_BYTE (M);
3176                   ONE_MORE_BYTE (L);
3177                   size = ((M - 128) * 128) + (L - 128);
3178                   if (charbuf + 8 + size > charbuf_end)
3179                     goto break_loop;
3180                   *charbuf++ = ISO_CODE_ESC;
3181                   *charbuf++ = '%';
3182                   *charbuf++ = '/';
3183                   *charbuf++ = dim;
3184                   *charbuf++ = BYTE8_TO_CHAR (M);
3185                   *charbuf++ = BYTE8_TO_CHAR (L);
3186                   while (size-- > 0)
3187                     {
3188                       ONE_MORE_BYTE (c1);
3189                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3190                     }
3191                 }
3192               else if (c1 == 'G')
3193                 {
3194                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3195                      ESC % G --UTF-8-BYTES-- ESC % @
3196                      We keep these bytes as is for the moment.
3197                      They may be decoded by post-read-conversion.  */
3198                   int *p = charbuf;
3199
3200                   if (p + 6 > charbuf_end)
3201                     goto break_loop;
3202                   *p++ = ISO_CODE_ESC;
3203                   *p++ = '%';
3204                   *p++ = 'G';
3205                   while (p < charbuf_end)
3206                     {
3207                       ONE_MORE_BYTE (c1);
3208                       if (c1 == ISO_CODE_ESC
3209                           && src + 1 < src_end
3210                           && src[0] == '%'
3211                           && src[1] == '@')
3212                         break;
3213                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3214                     }
3215                   if (p + 3 > charbuf_end)
3216                     goto break_loop;
3217                   *p++ = ISO_CODE_ESC;
3218                   *p++ = '%';
3219                   *p++ = '@';
3220                   charbuf = p;
3221                 }
3222               else
3223                 goto invalid_code;
3224               continue;
3225               break;
3226
3227             default:
3228               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3229                 goto invalid_code;
3230               if (c1 >= 0x28 && c1 <= 0x2B)
3231                 {       /* designation of DIMENSION1_CHARS94 character set */
3232                   ONE_MORE_BYTE (c2);
3233                   DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
3234                 }
3235               else if (c1 >= 0x2C && c1 <= 0x2F)
3236                 {       /* designation of DIMENSION1_CHARS96 character set */
3237                   ONE_MORE_BYTE (c2);
3238                   DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
3239                 }
3240               else
3241                 goto invalid_code;
3242               /* We must update these variables now.  */
3243               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3244               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3245               continue;
3246             }
3247         }
3248
3249       if (charset->id != charset_ascii
3250           && last_id != charset->id)
3251         {
3252           if (last_id != charset_ascii)
3253             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3254           last_id = charset->id;
3255           last_offset = char_offset;
3256         }
3257
3258       /* Now we know CHARSET and 1st position code C1 of a character.
3259          Produce a decoded character while getting 2nd position code
3260          C2 if necessary.  */
3261       c1 &= 0x7F;
3262       if (CHARSET_DIMENSION (charset) > 1)
3263         {
3264           ONE_MORE_BYTE (c2);
3265           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3266             /* C2 is not in a valid range.  */
3267             goto invalid_code;
3268           c1 = (c1 << 8) | (c2 & 0x7F);
3269           if (CHARSET_DIMENSION (charset) > 2)
3270             {
3271               ONE_MORE_BYTE (c2);
3272               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3273                 /* C2 is not in a valid range.  */
3274                 goto invalid_code;
3275               c1 = (c1 << 8) | (c2 & 0x7F);
3276             }
3277         }
3278
3279       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3280       if (c < 0)
3281         {
3282           MAYBE_FINISH_COMPOSITION ();
3283           for (; src_base < src; src_base++, char_offset++)
3284             {
3285               if (ASCII_BYTE_P (*src_base))
3286                 *charbuf++ = *src_base;
3287               else
3288                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3289             }
3290         }
3291       else if (composition_state == COMPOSING_NO)
3292         {
3293           *charbuf++ = c;
3294           char_offset++;
3295         }
3296       else
3297         {
3298           components[component_idx++] = c;
3299           if (method == COMPOSITION_WITH_RULE
3300               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3301                   && composition_state == COMPOSING_COMPONENT_CHAR))
3302             composition_state++;
3303         }
3304       continue;
3305
3306     invalid_code:
3307       MAYBE_FINISH_COMPOSITION ();
3308       src = src_base;
3309       consumed_chars = consumed_chars_base;
3310       ONE_MORE_BYTE (c);
3311       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3312       char_offset++;
3313       coding->errors++;
3314       continue;
3315
3316     break_loop:
3317       break;
3318     }
3319
3320  no_more_source:
3321   if (last_id != charset_ascii)
3322     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3323   coding->consumed_char += consumed_chars_base;
3324   coding->consumed = src_base - coding->source;
3325   coding->charbuf_used = charbuf - coding->charbuf;
3326 }
3327
3328
3329 /* ISO2022 encoding stuff.  */
3330
3331 /*
3332    It is not enough to say just "ISO2022" on encoding, we have to
3333    specify more details.  In Emacs, each coding system of ISO2022
3334    variant has the following specifications:
3335         1. Initial designation to G0 thru G3.
3336         2. Allows short-form designation?
3337         3. ASCII should be designated to G0 before control characters?
3338         4. ASCII should be designated to G0 at end of line?
3339         5. 7-bit environment or 8-bit environment?
3340         6. Use locking-shift?
3341         7. Use Single-shift?
3342    And the following two are only for Japanese:
3343         8. Use ASCII in place of JIS0201-1976-Roman?
3344         9. Use JISX0208-1983 in place of JISX0208-1978?
3345    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3346    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3347    details.
3348 */
3349
3350 /* Produce codes (escape sequence) for designating CHARSET to graphic
3351    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3352    '@', 'A', or 'B' and the coding system CODING allows, produce
3353    designation sequence of short-form.  */
3354
3355 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3356   do {                                                                  \
3357     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3358     char *intermediate_char_94 = "()*+";                                \
3359     char *intermediate_char_96 = ",-./";                                \
3360     int revision = -1;                                                  \
3361     int c;                                                              \
3362                                                                         \
3363     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3364       revision = CHARSET_ISO_REVISION (charset);                        \
3365                                                                         \
3366     if (revision >= 0)                                                  \
3367       {                                                                 \
3368         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3369         EMIT_ONE_BYTE ('@' + revision);                                 \
3370       }                                                                 \
3371     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3372     if (CHARSET_DIMENSION (charset) == 1)                               \
3373       {                                                                 \
3374         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3375           c = intermediate_char_94[reg];                                \
3376         else                                                            \
3377           c = intermediate_char_96[reg];                                \
3378         EMIT_ONE_ASCII_BYTE (c);                                        \
3379       }                                                                 \
3380     else                                                                \
3381       {                                                                 \
3382         EMIT_ONE_ASCII_BYTE ('$');                                      \
3383         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3384           {                                                             \
3385             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3386                 || reg != 0                                             \
3387                 || final_char < '@' || final_char > 'B')                \
3388               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3389           }                                                             \
3390         else                                                            \
3391           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3392       }                                                                 \
3393     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3394                                                                         \
3395     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3396   } while (0)
3397
3398
3399 /* The following two macros produce codes (control character or escape
3400    sequence) for ISO2022 single-shift functions (single-shift-2 and
3401    single-shift-3).  */
3402
3403 #define ENCODE_SINGLE_SHIFT_2                                           \
3404   do {                                                                  \
3405     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3406       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3407     else                                                                \
3408       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3409     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3410   } while (0)
3411
3412
3413 #define ENCODE_SINGLE_SHIFT_3                                           \
3414   do {                                                                  \
3415     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3416       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3417     else                                                                \
3418       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3419     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3420   } while (0)
3421
3422
3423 /* The following four macros produce codes (control character or
3424    escape sequence) for ISO2022 locking-shift functions (shift-in,
3425    shift-out, locking-shift-2, and locking-shift-3).  */
3426
3427 #define ENCODE_SHIFT_IN                                 \
3428   do {                                                  \
3429     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3430     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3431   } while (0)
3432
3433
3434 #define ENCODE_SHIFT_OUT                                \
3435   do {                                                  \
3436     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3437     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3438   } while (0)
3439
3440
3441 #define ENCODE_LOCKING_SHIFT_2                          \
3442   do {                                                  \
3443     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3444     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3445   } while (0)
3446
3447
3448 #define ENCODE_LOCKING_SHIFT_3                          \
3449   do {                                                  \
3450     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3451     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3452   } while (0)
3453
3454
3455 /* Produce codes for a DIMENSION1 character whose character set is
3456    CHARSET and whose position-code is C1.  Designation and invocation
3457    sequences are also produced in advance if necessary.  */
3458
3459 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3460   do {                                                                  \
3461     int id = CHARSET_ID (charset);                                      \
3462                                                                         \
3463     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3464         && id == charset_ascii)                                         \
3465       {                                                                 \
3466         id = charset_jisx0201_roman;                                    \
3467         charset = CHARSET_FROM_ID (id);                                 \
3468       }                                                                 \
3469                                                                         \
3470     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3471       {                                                                 \
3472         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3473           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3474         else                                                            \
3475           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3476         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3477         break;                                                          \
3478       }                                                                 \
3479     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3480       {                                                                 \
3481         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3482         break;                                                          \
3483       }                                                                 \
3484     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3485       {                                                                 \
3486         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3487         break;                                                          \
3488       }                                                                 \
3489     else                                                                \
3490       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3491          must invoke it, or, at first, designate it to some graphic     \
3492          register.  Then repeat the loop to actually produce the        \
3493          character.  */                                                 \
3494       dst = encode_invocation_designation (charset, coding, dst,        \
3495                                            &produced_chars);            \
3496   } while (1)
3497
3498
3499 /* Produce codes for a DIMENSION2 character whose character set is
3500    CHARSET and whose position-codes are C1 and C2.  Designation and
3501    invocation codes are also produced in advance if necessary.  */
3502
3503 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3504   do {                                                                  \
3505     int id = CHARSET_ID (charset);                                      \
3506                                                                         \
3507     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3508         && id == charset_jisx0208)                                      \
3509       {                                                                 \
3510         id = charset_jisx0208_1978;                                     \
3511         charset = CHARSET_FROM_ID (id);                                 \
3512       }                                                                 \
3513                                                                         \
3514     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3515       {                                                                 \
3516         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3517           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3518         else                                                            \
3519           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3520         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3521         break;                                                          \
3522       }                                                                 \
3523     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3524       {                                                                 \
3525         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3526         break;                                                          \
3527       }                                                                 \
3528     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3529       {                                                                 \
3530         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3531         break;                                                          \
3532       }                                                                 \
3533     else                                                                \
3534       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3535          must invoke it, or, at first, designate it to some graphic     \
3536          register.  Then repeat the loop to actually produce the        \
3537          character.  */                                                 \
3538       dst = encode_invocation_designation (charset, coding, dst,        \
3539                                            &produced_chars);            \
3540   } while (1)
3541
3542
3543 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3544   do {                                                                     \
3545     int code = ENCODE_CHAR ((charset),(c));                                \
3546                                                                            \
3547     if (CHARSET_DIMENSION (charset) == 1)                                  \
3548       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3549     else                                                                   \
3550       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3551   } while (0)
3552
3553
3554 /* Produce designation and invocation codes at a place pointed by DST
3555    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3556    Return new DST.  */
3557
3558 unsigned char *
3559 encode_invocation_designation (charset, coding, dst, p_nchars)
3560      struct charset *charset;
3561      struct coding_system *coding;
3562      unsigned char *dst;
3563      int *p_nchars;
3564 {
3565   int multibytep = coding->dst_multibyte;
3566   int produced_chars = *p_nchars;
3567   int reg;                      /* graphic register number */
3568   int id = CHARSET_ID (charset);
3569
3570   /* At first, check designations.  */
3571   for (reg = 0; reg < 4; reg++)
3572     if (id == CODING_ISO_DESIGNATION (coding, reg))
3573       break;
3574
3575   if (reg >= 4)
3576     {
3577       /* CHARSET is not yet designated to any graphic registers.  */
3578       /* At first check the requested designation.  */
3579       reg = CODING_ISO_REQUEST (coding, id);
3580       if (reg < 0)
3581         /* Since CHARSET requests no special designation, designate it
3582            to graphic register 0.  */
3583         reg = 0;
3584
3585       ENCODE_DESIGNATION (charset, reg, coding);
3586     }
3587
3588   if (CODING_ISO_INVOCATION (coding, 0) != reg
3589       && CODING_ISO_INVOCATION (coding, 1) != reg)
3590     {
3591       /* Since the graphic register REG is not invoked to any graphic
3592          planes, invoke it to graphic plane 0.  */
3593       switch (reg)
3594         {
3595         case 0:                 /* graphic register 0 */
3596           ENCODE_SHIFT_IN;
3597           break;
3598
3599         case 1:                 /* graphic register 1 */
3600           ENCODE_SHIFT_OUT;
3601           break;
3602
3603         case 2:                 /* graphic register 2 */
3604           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3605             ENCODE_SINGLE_SHIFT_2;
3606           else
3607             ENCODE_LOCKING_SHIFT_2;
3608           break;
3609
3610         case 3:                 /* graphic register 3 */
3611           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3612             ENCODE_SINGLE_SHIFT_3;
3613           else
3614             ENCODE_LOCKING_SHIFT_3;
3615           break;
3616         }
3617     }
3618
3619   *p_nchars = produced_chars;
3620   return dst;
3621 }
3622
3623 /* The following three macros produce codes for indicating direction
3624    of text.  */
3625 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3626   do {                                                                  \
3627     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3628       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3629     else                                                                \
3630       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3631   } while (0)
3632
3633
3634 #define ENCODE_DIRECTION_R2L()                  \
3635   do {                                          \
3636     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3637     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3638   } while (0)
3639
3640
3641 #define ENCODE_DIRECTION_L2R()                  \
3642   do {                                          \
3643     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3644     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3645   } while (0)
3646
3647
3648 /* Produce codes for designation and invocation to reset the graphic
3649    planes and registers to initial state.  */
3650 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3651   do {                                                                  \
3652     int reg;                                                            \
3653     struct charset *charset;                                            \
3654                                                                         \
3655     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3656       ENCODE_SHIFT_IN;                                                  \
3657     for (reg = 0; reg < 4; reg++)                                       \
3658       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3659           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3660               != CODING_ISO_INITIAL (coding, reg)))                     \
3661         {                                                               \
3662           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3663           ENCODE_DESIGNATION (charset, reg, coding);                    \
3664         }                                                               \
3665   } while (0)
3666
3667
3668 /* Produce designation sequences of charsets in the line started from
3669    SRC to a place pointed by DST, and return updated DST.
3670
3671    If the current block ends before any end-of-line, we may fail to
3672    find all the necessary designations.  */
3673
3674 static unsigned char *
3675 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3676      struct coding_system *coding;
3677      int *charbuf, *charbuf_end;
3678      unsigned char *dst;
3679 {
3680   struct charset *charset;
3681   /* Table of charsets to be designated to each graphic register.  */
3682   int r[4];
3683   int c, found = 0, reg;
3684   int produced_chars = 0;
3685   int multibytep = coding->dst_multibyte;
3686   Lisp_Object attrs;
3687   Lisp_Object charset_list;
3688
3689   attrs = CODING_ID_ATTRS (coding->id);
3690   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3691   if (EQ (charset_list, Qiso_2022))
3692     charset_list = Viso_2022_charset_list;
3693
3694   for (reg = 0; reg < 4; reg++)
3695     r[reg] = -1;
3696
3697   while (found < 4)
3698     {
3699       int id;
3700
3701       c = *charbuf++;
3702       if (c == '\n')
3703         break;
3704       charset = char_charset (c, charset_list, NULL);
3705       id = CHARSET_ID (charset);
3706       reg = CODING_ISO_REQUEST (coding, id);
3707       if (reg >= 0 && r[reg] < 0)
3708         {
3709           found++;
3710           r[reg] = id;
3711         }
3712     }
3713
3714   if (found)
3715     {
3716       for (reg = 0; reg < 4; reg++)
3717         if (r[reg] >= 0
3718             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3719           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3720     }
3721
3722   return dst;
3723 }
3724
3725 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
3726
3727 static int
3728 encode_coding_iso_2022 (coding)
3729      struct coding_system *coding;
3730 {
3731   int multibytep = coding->dst_multibyte;
3732   int *charbuf = coding->charbuf;
3733   int *charbuf_end = charbuf + coding->charbuf_used;
3734   unsigned char *dst = coding->destination + coding->produced;
3735   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3736   int safe_room = 16;
3737   int bol_designation
3738     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3739        && CODING_ISO_BOL (coding));
3740   int produced_chars = 0;
3741   Lisp_Object attrs, eol_type, charset_list;
3742   int ascii_compatible;
3743   int c;
3744   int preferred_charset_id = -1;
3745
3746   CODING_GET_INFO (coding, attrs, charset_list);
3747   eol_type = CODING_ID_EOL_TYPE (coding->id);
3748   if (VECTORP (eol_type))
3749     eol_type = Qunix;
3750
3751   setup_iso_safe_charsets (attrs);
3752   /* Charset list may have been changed.  */
3753   charset_list = CODING_ATTR_CHARSET_LIST (attrs);              \
3754   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3755
3756   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3757
3758   while (charbuf < charbuf_end)
3759     {
3760       ASSURE_DESTINATION (safe_room);
3761
3762       if (bol_designation)
3763         {
3764           unsigned char *dst_prev = dst;
3765
3766           /* We have to produce designation sequences if any now.  */
3767           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3768           bol_designation = 0;
3769           /* We are sure that designation sequences are all ASCII bytes.  */
3770           produced_chars += dst - dst_prev;
3771         }
3772
3773       c = *charbuf++;
3774
3775       if (c < 0)
3776         {
3777           /* Handle an annotation.  */
3778           switch (*charbuf)
3779             {
3780             case CODING_ANNOTATE_COMPOSITION_MASK:
3781               /* Not yet implemented.  */
3782               break;
3783             case CODING_ANNOTATE_CHARSET_MASK:
3784               preferred_charset_id = charbuf[3];
3785               if (preferred_charset_id >= 0
3786                   && NILP (Fmemq (make_number (preferred_charset_id),
3787                                   charset_list)))
3788                 preferred_charset_id = -1;
3789               break;
3790             default:
3791               abort ();
3792             }
3793           charbuf += -c - 1;
3794           continue;
3795         }
3796
3797       /* Now encode the character C.  */
3798       if (c < 0x20 || c == 0x7F)
3799         {
3800           if (c == '\n'
3801               || (c == '\r' && EQ (eol_type, Qmac)))
3802             {
3803               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3804                 ENCODE_RESET_PLANE_AND_REGISTER ();
3805               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3806                 {
3807                   int i;
3808
3809                   for (i = 0; i < 4; i++)
3810                     CODING_ISO_DESIGNATION (coding, i)
3811                       = CODING_ISO_INITIAL (coding, i);
3812                 }
3813               bol_designation
3814                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3815             }
3816           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3817             ENCODE_RESET_PLANE_AND_REGISTER ();
3818           EMIT_ONE_ASCII_BYTE (c);
3819         }
3820       else if (ASCII_CHAR_P (c))
3821         {
3822           if (ascii_compatible)
3823             EMIT_ONE_ASCII_BYTE (c);
3824           else
3825             {
3826               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3827               ENCODE_ISO_CHARACTER (charset, c);
3828             }
3829         }
3830       else if (CHAR_BYTE8_P (c))
3831         {
3832           c = CHAR_TO_BYTE8 (c);
3833           EMIT_ONE_BYTE (c);
3834         }
3835       else
3836         {
3837           struct charset *charset;
3838
3839           if (preferred_charset_id >= 0)
3840             {
3841               charset = CHARSET_FROM_ID (preferred_charset_id);
3842               if (! CHAR_CHARSET_P (c, charset))
3843                 charset = char_charset (c, charset_list, NULL);
3844             }
3845           else
3846             charset = char_charset (c, charset_list, NULL);
3847           if (!charset)
3848             {
3849               if (coding->mode & CODING_MODE_SAFE_ENCODING)
3850                 {
3851                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3852                   charset = CHARSET_FROM_ID (charset_ascii);
3853                 }
3854               else
3855                 {
3856                   c = coding->default_char;
3857                   charset = char_charset (c, charset_list, NULL);
3858                 }
3859             }
3860           ENCODE_ISO_CHARACTER (charset, c);
3861         }
3862     }
3863
3864   if (coding->mode & CODING_MODE_LAST_BLOCK
3865       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3866     {
3867       ASSURE_DESTINATION (safe_room);
3868       ENCODE_RESET_PLANE_AND_REGISTER ();
3869     }
3870   record_conversion_result (coding, CODING_RESULT_SUCCESS);
3871   CODING_ISO_BOL (coding) = bol_designation;
3872   coding->produced_char += produced_chars;
3873   coding->produced = dst - coding->destination;
3874   return 0;
3875 }
3876
3877 \f
3878 /*** 8,9. SJIS and BIG5 handlers ***/
3879
3880 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3881    quite widely.  So, for the moment, Emacs supports them in the bare
3882    C code.  But, in the future, they may be supported only by CCL.  */
3883
3884 /* SJIS is a coding system encoding three character sets: ASCII, right
3885    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
3886    as is.  A character of charset katakana-jisx0201 is encoded by
3887    "position-code + 0x80".  A character of charset japanese-jisx0208
3888    is encoded in 2-byte but two position-codes are divided and shifted
3889    so that it fit in the range below.
3890
3891    --- CODE RANGE of SJIS ---
3892    (character set)      (range)
3893    ASCII                0x00 .. 0x7F
3894    KATAKANA-JISX0201    0xA0 .. 0xDF
3895    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
3896             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
3897    -------------------------------
3898
3899 */
3900
3901 /* BIG5 is a coding system encoding two character sets: ASCII and
3902    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
3903    character set and is encoded in two-byte.
3904
3905    --- CODE RANGE of BIG5 ---
3906    (character set)      (range)
3907    ASCII                0x00 .. 0x7F
3908    Big5 (1st byte)      0xA1 .. 0xFE
3909         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
3910    --------------------------
3911
3912   */
3913
3914 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3915    Check if a text is encoded in SJIS.  If it is, return
3916    CATEGORY_MASK_SJIS, else return 0.  */
3917
3918 static int
3919 detect_coding_sjis (coding, detect_info)
3920      struct coding_system *coding;
3921      struct coding_detection_info *detect_info;
3922 {
3923   const unsigned char *src = coding->source, *src_base;
3924   const unsigned char *src_end = coding->source + coding->src_bytes;
3925   int multibytep = coding->src_multibyte;
3926   int consumed_chars = 0;
3927   int found = 0;
3928   int c;
3929
3930   detect_info->checked |= CATEGORY_MASK_SJIS;
3931   /* A coding system of this category is always ASCII compatible.  */
3932   src += coding->head_ascii;
3933
3934   while (1)
3935     {
3936       src_base = src;
3937       ONE_MORE_BYTE (c);
3938       if (c < 0x80)
3939         continue;
3940       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3941         {
3942           ONE_MORE_BYTE (c);
3943           if (c < 0x40 || c == 0x7F || c > 0xFC)
3944             break;
3945           found = CATEGORY_MASK_SJIS;
3946         }
3947       else if (c >= 0xA0 && c < 0xE0)
3948         found = CATEGORY_MASK_SJIS;
3949       else
3950         break;
3951     }
3952   detect_info->rejected |= CATEGORY_MASK_SJIS;
3953   return 0;
3954
3955  no_more_source:
3956   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
3957     {
3958       detect_info->rejected |= CATEGORY_MASK_SJIS;
3959       return 0;
3960     }
3961   detect_info->found |= found;
3962   return 1;
3963 }
3964
3965 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3966    Check if a text is encoded in BIG5.  If it is, return
3967    CATEGORY_MASK_BIG5, else return 0.  */
3968
3969 static int
3970 detect_coding_big5 (coding, detect_info)
3971      struct coding_system *coding;
3972      struct coding_detection_info *detect_info;
3973 {
3974   const unsigned char *src = coding->source, *src_base;
3975   const unsigned char *src_end = coding->source + coding->src_bytes;
3976   int multibytep = coding->src_multibyte;
3977   int consumed_chars = 0;
3978   int found = 0;
3979   int c;
3980
3981   detect_info->checked |= CATEGORY_MASK_BIG5;
3982   /* A coding system of this category is always ASCII compatible.  */
3983   src += coding->head_ascii;
3984
3985   while (1)
3986     {
3987       src_base = src;
3988       ONE_MORE_BYTE (c);
3989       if (c < 0x80)
3990         continue;
3991       if (c >= 0xA1)
3992         {
3993           ONE_MORE_BYTE (c);
3994           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3995             return 0;
3996           found = CATEGORY_MASK_BIG5;
3997         }
3998       else
3999         break;
4000     }
4001   detect_info->rejected |= CATEGORY_MASK_BIG5;
4002   return 0;
4003
4004  no_more_source:
4005   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4006     {
4007       detect_info->rejected |= CATEGORY_MASK_BIG5;
4008       return 0;
4009     }
4010   detect_info->found |= found;
4011   return 1;
4012 }
4013
4014 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4015    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4016
4017 static void
4018 decode_coding_sjis (coding)
4019      struct coding_system *coding;
4020 {
4021   const unsigned char *src = coding->source + coding->consumed;
4022   const unsigned char *src_end = coding->source + coding->src_bytes;
4023   const unsigned char *src_base;
4024   int *charbuf = coding->charbuf + coding->charbuf_used;
4025   int *charbuf_end
4026     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4027   int consumed_chars = 0, consumed_chars_base;
4028   int multibytep = coding->src_multibyte;
4029   struct charset *charset_roman, *charset_kanji, *charset_kana;
4030   struct charset *charset_kanji2;
4031   Lisp_Object attrs, charset_list, val;
4032   int char_offset = coding->produced_char;
4033   int last_offset = char_offset;
4034   int last_id = charset_ascii;
4035
4036   CODING_GET_INFO (coding, attrs, charset_list);
4037
4038   val = charset_list;
4039   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4040   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4041   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4042   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4043
4044   while (1)
4045     {
4046       int c, c1;
4047       struct charset *charset;
4048
4049       src_base = src;
4050       consumed_chars_base = consumed_chars;
4051
4052       if (charbuf >= charbuf_end)
4053         break;
4054
4055       ONE_MORE_BYTE (c);
4056       if (c < 0)
4057         goto invalid_code;
4058       if (c < 0x80)
4059         charset = charset_roman;
4060       else if (c == 0x80 || c == 0xA0)
4061         goto invalid_code;
4062       else if (c >= 0xA1 && c <= 0xDF)
4063         {
4064           /* SJIS -> JISX0201-Kana */
4065           c &= 0x7F;
4066           charset = charset_kana;
4067         }
4068       else if (c <= 0xEF)
4069         {
4070           /* SJIS -> JISX0208 */
4071           ONE_MORE_BYTE (c1);
4072           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4073             goto invalid_code;
4074           c = (c << 8) | c1;
4075           SJIS_TO_JIS (c);
4076           charset = charset_kanji;
4077         }
4078       else if (c <= 0xFC && charset_kanji2)
4079         {
4080           /* SJIS -> JISX0213-2 */
4081           ONE_MORE_BYTE (c1);
4082           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4083             goto invalid_code;
4084           c = (c << 8) | c1;
4085           SJIS_TO_JIS2 (c);
4086           charset = charset_kanji2;
4087         }
4088       else
4089         goto invalid_code;
4090       if (charset->id != charset_ascii
4091           && last_id != charset->id)
4092         {
4093           if (last_id != charset_ascii)
4094             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4095           last_id = charset->id;
4096           last_offset = char_offset;
4097         }
4098       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4099       *charbuf++ = c;
4100       char_offset++;
4101       continue;
4102
4103     invalid_code:
4104       src = src_base;
4105       consumed_chars = consumed_chars_base;
4106       ONE_MORE_BYTE (c);
4107       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4108       char_offset++;
4109       coding->errors++;
4110     }
4111
4112  no_more_source:
4113   if (last_id != charset_ascii)
4114     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4115   coding->consumed_char += consumed_chars_base;
4116   coding->consumed = src_base - coding->source;
4117   coding->charbuf_used = charbuf - coding->charbuf;
4118 }
4119
4120 static void
4121 decode_coding_big5 (coding)
4122      struct coding_system *coding;
4123 {
4124   const unsigned char *src = coding->source + coding->consumed;
4125   const unsigned char *src_end = coding->source + coding->src_bytes;
4126   const unsigned char *src_base;
4127   int *charbuf = coding->charbuf + coding->charbuf_used;
4128   int *charbuf_end
4129     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4130   int consumed_chars = 0, consumed_chars_base;
4131   int multibytep = coding->src_multibyte;
4132   struct charset *charset_roman, *charset_big5;
4133   Lisp_Object attrs, charset_list, val;
4134   int char_offset = coding->produced_char;
4135   int last_offset = char_offset;
4136   int last_id = charset_ascii;
4137
4138   CODING_GET_INFO (coding, attrs, charset_list);
4139   val = charset_list;
4140   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4141   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4142
4143   while (1)
4144     {
4145       int c, c1;
4146       struct charset *charset;
4147
4148       src_base = src;
4149       consumed_chars_base = consumed_chars;
4150
4151       if (charbuf >= charbuf_end)
4152         break;
4153
4154       ONE_MORE_BYTE (c);
4155
4156       if (c < 0)
4157         goto invalid_code;
4158       if (c < 0x80)
4159         charset = charset_roman;
4160       else
4161         {
4162           /* BIG5 -> Big5 */
4163           if (c < 0xA1 || c > 0xFE)
4164             goto invalid_code;
4165           ONE_MORE_BYTE (c1);
4166           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4167             goto invalid_code;
4168           c = c << 8 | c1;
4169           charset = charset_big5;
4170         }
4171       if (charset->id != charset_ascii
4172           && last_id != charset->id)
4173         {
4174           if (last_id != charset_ascii)
4175             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4176           last_id = charset->id;
4177           last_offset = char_offset;
4178         }
4179       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4180       *charbuf++ = c;
4181       char_offset++;
4182       continue;
4183
4184     invalid_code:
4185       src = src_base;
4186       consumed_chars = consumed_chars_base;
4187       ONE_MORE_BYTE (c);
4188       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4189       char_offset++;
4190       coding->errors++;
4191     }
4192
4193  no_more_source:
4194   if (last_id != charset_ascii)
4195     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4196   coding->consumed_char += consumed_chars_base;
4197   coding->consumed = src_base - coding->source;
4198   coding->charbuf_used = charbuf - coding->charbuf;
4199 }
4200
4201 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4202    This function can encode charsets `ascii', `katakana-jisx0201',
4203    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4204    are sure that all these charsets are registered as official charset
4205    (i.e. do not have extended leading-codes).  Characters of other
4206    charsets are produced without any encoding.  If SJIS_P is 1, encode
4207    SJIS text, else encode BIG5 text.  */
4208
4209 static int
4210 encode_coding_sjis (coding)
4211      struct coding_system *coding;
4212 {
4213   int multibytep = coding->dst_multibyte;
4214   int *charbuf = coding->charbuf;
4215   int *charbuf_end = charbuf + coding->charbuf_used;
4216   unsigned char *dst = coding->destination + coding->produced;
4217   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4218   int safe_room = 4;
4219   int produced_chars = 0;
4220   Lisp_Object attrs, charset_list, val;
4221   int ascii_compatible;
4222   struct charset *charset_roman, *charset_kanji, *charset_kana;
4223   struct charset *charset_kanji2;
4224   int c;
4225
4226   CODING_GET_INFO (coding, attrs, charset_list);
4227   val = charset_list;
4228   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4229   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4230   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4231   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4232
4233   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4234
4235   while (charbuf < charbuf_end)
4236     {
4237       ASSURE_DESTINATION (safe_room);
4238       c = *charbuf++;
4239       /* Now encode the character C.  */
4240       if (ASCII_CHAR_P (c) && ascii_compatible)
4241         EMIT_ONE_ASCII_BYTE (c);
4242       else if (CHAR_BYTE8_P (c))
4243         {
4244           c = CHAR_TO_BYTE8 (c);
4245           EMIT_ONE_BYTE (c);
4246         }
4247       else
4248         {
4249           unsigned code;
4250           struct charset *charset = char_charset (c, charset_list, &code);
4251
4252           if (!charset)
4253             {
4254               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4255                 {
4256                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4257                   charset = CHARSET_FROM_ID (charset_ascii);
4258                 }
4259               else
4260                 {
4261                   c = coding->default_char;
4262                   charset = char_charset (c, charset_list, &code);
4263                 }
4264             }
4265           if (code == CHARSET_INVALID_CODE (charset))
4266             abort ();
4267           if (charset == charset_kanji)
4268             {
4269               int c1, c2;
4270               JIS_TO_SJIS (code);
4271               c1 = code >> 8, c2 = code & 0xFF;
4272               EMIT_TWO_BYTES (c1, c2);
4273             }
4274           else if (charset == charset_kana)
4275             EMIT_ONE_BYTE (code | 0x80);
4276           else if (charset_kanji2 && charset == charset_kanji2)
4277             {
4278               int c1, c2;
4279
4280               c1 = code >> 8;
4281               if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4282                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4283                 {
4284                   JIS_TO_SJIS2 (code);
4285                   c1 = code >> 8, c2 = code & 0xFF;
4286                   EMIT_TWO_BYTES (c1, c2);
4287                 }
4288               else
4289                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4290             }
4291           else
4292             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4293         }
4294     }
4295   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4296   coding->produced_char += produced_chars;
4297   coding->produced = dst - coding->destination;
4298   return 0;
4299 }
4300
4301 static int
4302 encode_coding_big5 (coding)
4303      struct coding_system *coding;
4304 {
4305   int multibytep = coding->dst_multibyte;
4306   int *charbuf = coding->charbuf;
4307   int *charbuf_end = charbuf + coding->charbuf_used;
4308   unsigned char *dst = coding->destination + coding->produced;
4309   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4310   int safe_room = 4;
4311   int produced_chars = 0;
4312   Lisp_Object attrs, charset_list, val;
4313   int ascii_compatible;
4314   struct charset *charset_roman, *charset_big5;
4315   int c;
4316
4317   CODING_GET_INFO (coding, attrs, charset_list);
4318   val = charset_list;
4319   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4320   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4321   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4322
4323   while (charbuf < charbuf_end)
4324     {
4325       ASSURE_DESTINATION (safe_room);
4326       c = *charbuf++;
4327       /* Now encode the character C.  */
4328       if (ASCII_CHAR_P (c) && ascii_compatible)
4329         EMIT_ONE_ASCII_BYTE (c);
4330       else if (CHAR_BYTE8_P (c))
4331         {
4332           c = CHAR_TO_BYTE8 (c);
4333           EMIT_ONE_BYTE (c);
4334         }
4335       else
4336         {
4337           unsigned code;
4338           struct charset *charset = char_charset (c, charset_list, &code);
4339
4340           if (! charset)
4341             {
4342               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4343                 {
4344                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4345                   charset = CHARSET_FROM_ID (charset_ascii);
4346                 }
4347               else
4348                 {
4349                   c = coding->default_char;
4350                   charset = char_charset (c, charset_list, &code);
4351                 }
4352             }
4353           if (code == CHARSET_INVALID_CODE (charset))
4354             abort ();
4355           if (charset == charset_big5)
4356             {
4357               int c1, c2;
4358
4359               c1 = code >> 8, c2 = code & 0xFF;
4360               EMIT_TWO_BYTES (c1, c2);
4361             }
4362           else
4363             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4364         }
4365     }
4366   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4367   coding->produced_char += produced_chars;
4368   coding->produced = dst - coding->destination;
4369   return 0;
4370 }
4371
4372 \f
4373 /*** 10. CCL handlers ***/
4374
4375 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4376    Check if a text is encoded in a coding system of which
4377    encoder/decoder are written in CCL program.  If it is, return
4378    CATEGORY_MASK_CCL, else return 0.  */
4379
4380 static int
4381 detect_coding_ccl (coding, detect_info)
4382      struct coding_system *coding;
4383      struct coding_detection_info *detect_info;
4384 {
4385   const unsigned char *src = coding->source, *src_base;
4386   const unsigned char *src_end = coding->source + coding->src_bytes;
4387   int multibytep = coding->src_multibyte;
4388   int consumed_chars = 0;
4389   int found = 0;
4390   unsigned char *valids;
4391   int head_ascii = coding->head_ascii;
4392   Lisp_Object attrs;
4393
4394   detect_info->checked |= CATEGORY_MASK_CCL;
4395
4396   coding = &coding_categories[coding_category_ccl];
4397   valids = CODING_CCL_VALIDS (coding);
4398   attrs = CODING_ID_ATTRS (coding->id);
4399   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4400     src += head_ascii;
4401
4402   while (1)
4403     {
4404       int c;
4405
4406       src_base = src;
4407       ONE_MORE_BYTE (c);
4408       if (c < 0 || ! valids[c])
4409         break;
4410       if ((valids[c] > 1))
4411         found = CATEGORY_MASK_CCL;
4412     }
4413   detect_info->rejected |= CATEGORY_MASK_CCL;
4414   return 0;
4415
4416  no_more_source:
4417   detect_info->found |= found;
4418   return 1;
4419 }
4420
4421 static void
4422 decode_coding_ccl (coding)
4423      struct coding_system *coding;
4424 {
4425   const unsigned char *src = coding->source + coding->consumed;
4426   const unsigned char *src_end = coding->source + coding->src_bytes;
4427   int *charbuf = coding->charbuf + coding->charbuf_used;
4428   int *charbuf_end = coding->charbuf + coding->charbuf_size;
4429   int consumed_chars = 0;
4430   int multibytep = coding->src_multibyte;
4431   struct ccl_program ccl;
4432   int source_charbuf[1024];
4433   int source_byteidx[1024];
4434   Lisp_Object attrs, charset_list;
4435
4436   CODING_GET_INFO (coding, attrs, charset_list);
4437   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4438
4439   while (src < src_end)
4440     {
4441       const unsigned char *p = src;
4442       int *source, *source_end;
4443       int i = 0;
4444
4445       if (multibytep)
4446         while (i < 1024 && p < src_end)
4447           {
4448             source_byteidx[i] = p - src;
4449             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4450           }
4451       else
4452         while (i < 1024 && p < src_end)
4453           source_charbuf[i++] = *p++;
4454
4455       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4456         ccl.last_block = 1;
4457
4458       source = source_charbuf;
4459       source_end = source + i;
4460       while (source < source_end)
4461         {
4462           ccl_driver (&ccl, source, charbuf,
4463                       source_end - source, charbuf_end - charbuf,
4464                       charset_list);
4465           source += ccl.consumed;
4466           charbuf += ccl.produced;
4467           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4468             break;
4469         }
4470       if (source < source_end)
4471         src += source_byteidx[source - source_charbuf];
4472       else
4473         src = p;
4474       consumed_chars += source - source_charbuf;
4475
4476       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4477           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4478         break;
4479     }
4480
4481   switch (ccl.status)
4482     {
4483     case CCL_STAT_SUSPEND_BY_SRC:
4484       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4485       break;
4486     case CCL_STAT_SUSPEND_BY_DST:
4487       break;
4488     case CCL_STAT_QUIT:
4489     case CCL_STAT_INVALID_CMD:
4490       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4491       break;
4492     default:
4493       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4494       break;
4495     }
4496   coding->consumed_char += consumed_chars;
4497   coding->consumed = src - coding->source;
4498   coding->charbuf_used = charbuf - coding->charbuf;
4499 }
4500
4501 static int
4502 encode_coding_ccl (coding)
4503      struct coding_system *coding;
4504 {
4505   struct ccl_program ccl;
4506   int multibytep = coding->dst_multibyte;
4507   int *charbuf = coding->charbuf;
4508   int *charbuf_end = charbuf + coding->charbuf_used;
4509   unsigned char *dst = coding->destination + coding->produced;
4510   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4511   unsigned char *adjusted_dst_end = dst_end - 1;
4512   int destination_charbuf[1024];
4513   int i, produced_chars = 0;
4514   Lisp_Object attrs, charset_list;
4515
4516   CODING_GET_INFO (coding, attrs, charset_list);
4517   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4518
4519   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4520   ccl.dst_multibyte = coding->dst_multibyte;
4521
4522   while (charbuf < charbuf_end && dst < adjusted_dst_end)
4523     {
4524       int dst_bytes = dst_end - dst;
4525       if (dst_bytes > 1024)
4526         dst_bytes = 1024;
4527
4528       ccl_driver (&ccl, charbuf, destination_charbuf,
4529                   charbuf_end - charbuf, dst_bytes, charset_list);
4530       charbuf += ccl.consumed;
4531       if (multibytep)
4532         for (i = 0; i < ccl.produced; i++)
4533           EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4534       else
4535         {
4536           for (i = 0; i < ccl.produced; i++)
4537             *dst++ = destination_charbuf[i] & 0xFF;
4538           produced_chars += ccl.produced;
4539         }
4540     }
4541
4542   switch (ccl.status)
4543     {
4544     case CCL_STAT_SUSPEND_BY_SRC:
4545       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4546       break;
4547     case CCL_STAT_SUSPEND_BY_DST:
4548       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4549       break;
4550     case CCL_STAT_QUIT:
4551     case CCL_STAT_INVALID_CMD:
4552       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4553       break;
4554     default:
4555       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4556       break;
4557     }
4558
4559   coding->produced_char += produced_chars;
4560   coding->produced = dst - coding->destination;
4561   return 0;
4562 }
4563
4564
4565 \f
4566 /*** 10, 11. no-conversion handlers ***/
4567
4568 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4569
4570 static void
4571 decode_coding_raw_text (coding)
4572      struct coding_system *coding;
4573 {
4574   coding->chars_at_source = 1;
4575   coding->consumed_char = 0;
4576   coding->consumed = 0;
4577   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4578 }
4579
4580 static int
4581 encode_coding_raw_text (coding)
4582      struct coding_system *coding;
4583 {
4584   int multibytep = coding->dst_multibyte;
4585   int *charbuf = coding->charbuf;
4586   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4587   unsigned char *dst = coding->destination + coding->produced;
4588   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4589   int produced_chars = 0;
4590   int c;
4591
4592   if (multibytep)
4593     {
4594       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4595
4596       if (coding->src_multibyte)
4597         while (charbuf < charbuf_end)
4598           {
4599             ASSURE_DESTINATION (safe_room);
4600             c = *charbuf++;
4601             if (ASCII_CHAR_P (c))
4602               EMIT_ONE_ASCII_BYTE (c);
4603             else if (CHAR_BYTE8_P (c))
4604               {
4605                 c = CHAR_TO_BYTE8 (c);
4606                 EMIT_ONE_BYTE (c);
4607               }
4608             else
4609               {
4610                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4611
4612                 CHAR_STRING_ADVANCE (c, p1);
4613                 while (p0 < p1)
4614                   {
4615                     EMIT_ONE_BYTE (*p0);
4616                     p0++;
4617                   }
4618               }
4619           }
4620       else
4621         while (charbuf < charbuf_end)
4622           {
4623             ASSURE_DESTINATION (safe_room);
4624             c = *charbuf++;
4625             EMIT_ONE_BYTE (c);
4626           }
4627     }
4628   else
4629     {
4630       if (coding->src_multibyte)
4631         {
4632           int safe_room = MAX_MULTIBYTE_LENGTH;
4633
4634           while (charbuf < charbuf_end)
4635             {
4636               ASSURE_DESTINATION (safe_room);
4637               c = *charbuf++;
4638               if (ASCII_CHAR_P (c))
4639                 *dst++ = c;
4640               else if (CHAR_BYTE8_P (c))
4641                 *dst++ = CHAR_TO_BYTE8 (c);
4642               else
4643                 CHAR_STRING_ADVANCE (c, dst);
4644               produced_chars++;
4645             }
4646         }
4647       else
4648         {
4649           ASSURE_DESTINATION (charbuf_end - charbuf);
4650           while (charbuf < charbuf_end && dst < dst_end)
4651             *dst++ = *charbuf++;
4652           produced_chars = dst - (coding->destination + coding->dst_bytes);
4653         }
4654     }
4655   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4656   coding->produced_char += produced_chars;
4657   coding->produced = dst - coding->destination;
4658   return 0;
4659 }
4660
4661 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4662    Check if a text is encoded in a charset-based coding system.  If it
4663    is, return 1, else return 0.  */
4664
4665 static int
4666 detect_coding_charset (coding, detect_info)
4667      struct coding_system *coding;
4668      struct coding_detection_info *detect_info;
4669 {
4670   const unsigned char *src = coding->source, *src_base;
4671   const unsigned char *src_end = coding->source + coding->src_bytes;
4672   int multibytep = coding->src_multibyte;
4673   int consumed_chars = 0;
4674   Lisp_Object attrs, valids;
4675   int found = 0;
4676
4677   detect_info->checked |= CATEGORY_MASK_CHARSET;
4678
4679   coding = &coding_categories[coding_category_charset];
4680   attrs = CODING_ID_ATTRS (coding->id);
4681   valids = AREF (attrs, coding_attr_charset_valids);
4682
4683   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4684     src += coding->head_ascii;
4685
4686   while (1)
4687     {
4688       int c;
4689
4690       src_base = src;
4691       ONE_MORE_BYTE (c);
4692       if (c < 0)
4693         continue;
4694       if (NILP (AREF (valids, c)))
4695         break;
4696       if (c >= 0x80)
4697         found = CATEGORY_MASK_CHARSET;
4698     }
4699   detect_info->rejected |= CATEGORY_MASK_CHARSET;
4700   return 0;
4701
4702  no_more_source:
4703   detect_info->found |= found;
4704   return 1;
4705 }
4706
4707 static void
4708 decode_coding_charset (coding)
4709      struct coding_system *coding;
4710 {
4711   const unsigned char *src = coding->source + coding->consumed;
4712   const unsigned char *src_end = coding->source + coding->src_bytes;
4713   const unsigned char *src_base;
4714   int *charbuf = coding->charbuf + coding->charbuf_used;
4715   int *charbuf_end
4716     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4717   int consumed_chars = 0, consumed_chars_base;
4718   int multibytep = coding->src_multibyte;
4719   Lisp_Object attrs, charset_list, valids;
4720   int char_offset = coding->produced_char;
4721   int last_offset = char_offset;
4722   int last_id = charset_ascii;
4723
4724   CODING_GET_INFO (coding, attrs, charset_list);
4725   valids = AREF (attrs, coding_attr_charset_valids);
4726
4727   while (1)
4728     {
4729       int c;
4730       Lisp_Object val;
4731       struct charset *charset;
4732       int dim;
4733       int len = 1;
4734       unsigned code;
4735
4736       src_base = src;
4737       consumed_chars_base = consumed_chars;
4738
4739       if (charbuf >= charbuf_end)
4740         break;
4741
4742       ONE_MORE_BYTE (c);
4743       if (c < 0)
4744         goto invalid_code;
4745       code = c;
4746
4747       val = AREF (valids, c);
4748       if (NILP (val))
4749         goto invalid_code;
4750       if (INTEGERP (val))
4751         {
4752           charset = CHARSET_FROM_ID (XFASTINT (val));
4753           dim = CHARSET_DIMENSION (charset);
4754           while (len < dim)
4755             {
4756               ONE_MORE_BYTE (c);
4757               code = (code << 8) | c;
4758               len++;
4759             }
4760           CODING_DECODE_CHAR (coding, src, src_base, src_end,
4761                               charset, code, c);
4762         }
4763       else
4764         {
4765           /* VAL is a list of charset IDs.  It is assured that the
4766              list is sorted by charset dimensions (smaller one
4767              comes first).  */
4768           while (CONSP (val))
4769             {
4770               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4771               dim = CHARSET_DIMENSION (charset);
4772               while (len < dim)
4773                 {
4774                   ONE_MORE_BYTE (c);
4775                   code = (code << 8) | c;
4776                   len++;
4777                 }
4778               CODING_DECODE_CHAR (coding, src, src_base,
4779                                   src_end, charset, code, c);
4780               if (c >= 0)
4781                 break;
4782               val = XCDR (val);
4783             }
4784         }
4785       if (c < 0)
4786         goto invalid_code;
4787       if (charset->id != charset_ascii
4788           && last_id != charset->id)
4789         {
4790           if (last_id != charset_ascii)
4791             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4792           last_id = charset->id;
4793           last_offset = char_offset;
4794         }
4795
4796       *charbuf++ = c;
4797       char_offset++;
4798       continue;
4799
4800     invalid_code:
4801       src = src_base;
4802       consumed_chars = consumed_chars_base;
4803       ONE_MORE_BYTE (c);
4804       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4805       char_offset++;
4806       coding->errors++;
4807     }
4808
4809  no_more_source:
4810   if (last_id != charset_ascii)
4811     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4812   coding->consumed_char += consumed_chars_base;
4813   coding->consumed = src_base - coding->source;
4814   coding->charbuf_used = charbuf - coding->charbuf;
4815 }
4816
4817 static int
4818 encode_coding_charset (coding)
4819      struct coding_system *coding;
4820 {
4821   int multibytep = coding->dst_multibyte;
4822   int *charbuf = coding->charbuf;
4823   int *charbuf_end = charbuf + coding->charbuf_used;
4824   unsigned char *dst = coding->destination + coding->produced;
4825   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4826   int safe_room = MAX_MULTIBYTE_LENGTH;
4827   int produced_chars = 0;
4828   Lisp_Object attrs, charset_list;
4829   int ascii_compatible;
4830   int c;
4831
4832   CODING_GET_INFO (coding, attrs, charset_list);
4833   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4834
4835   while (charbuf < charbuf_end)
4836     {
4837       struct charset *charset;
4838       unsigned code;
4839
4840       ASSURE_DESTINATION (safe_room);
4841       c = *charbuf++;
4842       if (ascii_compatible && ASCII_CHAR_P (c))
4843         EMIT_ONE_ASCII_BYTE (c);
4844       else if (CHAR_BYTE8_P (c))
4845         {
4846           c = CHAR_TO_BYTE8 (c);
4847           EMIT_ONE_BYTE (c);
4848         }
4849       else
4850         {
4851           charset = char_charset (c, charset_list, &code);
4852           if (charset)
4853             {
4854               if (CHARSET_DIMENSION (charset) == 1)
4855                 EMIT_ONE_BYTE (code);
4856               else if (CHARSET_DIMENSION (charset) == 2)
4857                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4858               else if (CHARSET_DIMENSION (charset) == 3)
4859                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4860               else
4861                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4862                                  (code >> 8) & 0xFF, code & 0xFF);
4863             }
4864           else
4865             {
4866               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4867                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4868               else
4869                 c = coding->default_char;
4870               EMIT_ONE_BYTE (c);
4871             }
4872         }
4873     }
4874
4875   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4876   coding->produced_char += produced_chars;
4877   coding->produced = dst - coding->destination;
4878   return 0;
4879 }
4880
4881 \f
4882 /*** 7. C library functions ***/
4883
4884 /* Setup coding context CODING from information about CODING_SYSTEM.
4885    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
4886    CODING_SYSTEM is invalid, signal an error.  */
4887
4888 void
4889 setup_coding_system (coding_system, coding)
4890      Lisp_Object coding_system;
4891      struct coding_system *coding;
4892 {
4893   Lisp_Object attrs;
4894   Lisp_Object eol_type;
4895   Lisp_Object coding_type;
4896   Lisp_Object val;
4897
4898   if (NILP (coding_system))
4899     coding_system = Qno_conversion;
4900
4901   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4902
4903   attrs = CODING_ID_ATTRS (coding->id);
4904   eol_type = CODING_ID_EOL_TYPE (coding->id);
4905
4906   coding->mode = 0;
4907   coding->head_ascii = -1;
4908   coding->common_flags
4909     = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4910   if (! NILP (CODING_ATTR_POST_READ (attrs)))
4911     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
4912   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
4913     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
4914   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
4915     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4916
4917   val = CODING_ATTR_SAFE_CHARSETS (attrs);
4918   coding->max_charset_id = SCHARS (val) - 1;
4919   coding->safe_charsets = (char *) SDATA (val);
4920   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4921
4922   coding_type = CODING_ATTR_TYPE (attrs);
4923   if (EQ (coding_type, Qundecided))
4924     {
4925       coding->detector = NULL;
4926       coding->decoder = decode_coding_raw_text;
4927       coding->encoder = encode_coding_raw_text;
4928       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4929     }
4930   else if (EQ (coding_type, Qiso_2022))
4931     {
4932       int i;
4933       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4934
4935       /* Invoke graphic register 0 to plane 0.  */
4936       CODING_ISO_INVOCATION (coding, 0) = 0;
4937       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
4938       CODING_ISO_INVOCATION (coding, 1)
4939         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4940       /* Setup the initial status of designation.  */
4941       for (i = 0; i < 4; i++)
4942         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4943       /* Not single shifting initially.  */
4944       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4945       /* Beginning of buffer should also be regarded as bol. */
4946       CODING_ISO_BOL (coding) = 1;
4947       coding->detector = detect_coding_iso_2022;
4948       coding->decoder = decode_coding_iso_2022;
4949       coding->encoder = encode_coding_iso_2022;
4950       if (flags & CODING_ISO_FLAG_SAFE)
4951         coding->mode |= CODING_MODE_SAFE_ENCODING;
4952       coding->common_flags
4953         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4954             | CODING_REQUIRE_FLUSHING_MASK);
4955       if (flags & CODING_ISO_FLAG_COMPOSITION)
4956         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4957       if (flags & CODING_ISO_FLAG_DESIGNATION)
4958         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
4959       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4960         {
4961           setup_iso_safe_charsets (attrs);
4962           val = CODING_ATTR_SAFE_CHARSETS (attrs);
4963           coding->max_charset_id = SCHARS (val) - 1;
4964           coding->safe_charsets = (char *) SDATA (val);
4965         }
4966       CODING_ISO_FLAGS (coding) = flags;
4967     }
4968   else if (EQ (coding_type, Qcharset))
4969     {
4970       coding->detector = detect_coding_charset;
4971       coding->decoder = decode_coding_charset;
4972       coding->encoder = encode_coding_charset;
4973       coding->common_flags
4974         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4975     }
4976   else if (EQ (coding_type, Qutf_8))
4977     {
4978       coding->detector = detect_coding_utf_8;
4979       coding->decoder = decode_coding_utf_8;
4980       coding->encoder = encode_coding_utf_8;
4981       coding->common_flags
4982         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4983     }
4984   else if (EQ (coding_type, Qutf_16))
4985     {
4986       val = AREF (attrs, coding_attr_utf_16_bom);
4987       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4988                                     : EQ (val, Qt) ? utf_16_with_bom
4989                                     : utf_16_without_bom);
4990       val = AREF (attrs, coding_attr_utf_16_endian);
4991       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
4992                                        : utf_16_little_endian);
4993       CODING_UTF_16_SURROGATE (coding) = 0;
4994       coding->detector = detect_coding_utf_16;
4995       coding->decoder = decode_coding_utf_16;
4996       coding->encoder = encode_coding_utf_16;
4997       coding->common_flags
4998         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4999       if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
5000         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5001     }
5002   else if (EQ (coding_type, Qccl))
5003     {
5004       coding->detector = detect_coding_ccl;
5005       coding->decoder = decode_coding_ccl;
5006       coding->encoder = encode_coding_ccl;
5007       coding->common_flags
5008         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5009             | CODING_REQUIRE_FLUSHING_MASK);
5010     }
5011   else if (EQ (coding_type, Qemacs_mule))
5012     {
5013       coding->detector = detect_coding_emacs_mule;
5014       coding->decoder = decode_coding_emacs_mule;
5015       coding->encoder = encode_coding_emacs_mule;
5016       coding->common_flags
5017         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5018       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5019           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5020         {
5021           Lisp_Object tail, safe_charsets;
5022           int max_charset_id = 0;
5023
5024           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5025                tail = XCDR (tail))
5026             if (max_charset_id < XFASTINT (XCAR (tail)))
5027               max_charset_id = XFASTINT (XCAR (tail));
5028           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5029                                         make_number (255));
5030           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5031                tail = XCDR (tail))
5032             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5033           coding->max_charset_id = max_charset_id;
5034           coding->safe_charsets = (char *) SDATA (safe_charsets);
5035         }
5036     }
5037   else if (EQ (coding_type, Qshift_jis))
5038     {
5039       coding->detector = detect_coding_sjis;
5040       coding->decoder = decode_coding_sjis;
5041       coding->encoder = encode_coding_sjis;
5042       coding->common_flags
5043         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5044     }
5045   else if (EQ (coding_type, Qbig5))
5046     {
5047       coding->detector = detect_coding_big5;
5048       coding->decoder = decode_coding_big5;
5049       coding->encoder = encode_coding_big5;
5050       coding->common_flags
5051         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5052     }
5053   else                          /* EQ (coding_type, Qraw_text) */
5054     {
5055       coding->detector = NULL;
5056       coding->decoder = decode_coding_raw_text;
5057       coding->encoder = encode_coding_raw_text;
5058     }
5059
5060   return;
5061 }
5062
5063 /* Return raw-text or one of its subsidiaries that has the same
5064    eol_type as CODING-SYSTEM.  */
5065
5066 Lisp_Object
5067 raw_text_coding_system (coding_system)
5068      Lisp_Object coding_system;
5069 {
5070   Lisp_Object spec, attrs;
5071   Lisp_Object eol_type, raw_text_eol_type;
5072
5073   if (NILP (coding_system))
5074     return Qraw_text;
5075   spec = CODING_SYSTEM_SPEC (coding_system);
5076   attrs = AREF (spec, 0);
5077
5078   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5079     return coding_system;
5080
5081   eol_type = AREF (spec, 2);
5082   if (VECTORP (eol_type))
5083     return Qraw_text;
5084   spec = CODING_SYSTEM_SPEC (Qraw_text);
5085   raw_text_eol_type = AREF (spec, 2);
5086   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5087           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5088           : AREF (raw_text_eol_type, 2));
5089 }
5090
5091
5092 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5093    does, return one of the subsidiary that has the same eol-spec as
5094    PARENT.  Otherwise, return CODING_SYSTEM.  */
5095
5096 Lisp_Object
5097 coding_inherit_eol_type (coding_system, parent)
5098      Lisp_Object coding_system, parent;
5099 {
5100   Lisp_Object spec, eol_type;
5101
5102   if (NILP (coding_system))
5103     coding_system = Qraw_text;
5104   spec = CODING_SYSTEM_SPEC (coding_system);
5105   eol_type = AREF (spec, 2);
5106   if (VECTORP (eol_type)
5107       && ! NILP (parent))
5108     {
5109       Lisp_Object parent_spec;
5110       Lisp_Object parent_eol_type;
5111
5112       parent_spec
5113         = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
5114       parent_eol_type = AREF (parent_spec, 2);
5115       if (EQ (parent_eol_type, Qunix))
5116         coding_system = AREF (eol_type, 0);
5117       else if (EQ (parent_eol_type, Qdos))
5118         coding_system = AREF (eol_type, 1);
5119       else if (EQ (parent_eol_type, Qmac))
5120         coding_system = AREF (eol_type, 2);
5121     }
5122   return coding_system;
5123 }
5124
5125 /* Emacs has a mechanism to automatically detect a coding system if it
5126    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5127    it's impossible to distinguish some coding systems accurately
5128    because they use the same range of codes.  So, at first, coding
5129    systems are categorized into 7, those are:
5130
5131    o coding-category-emacs-mule
5132
5133         The category for a coding system which has the same code range
5134         as Emacs' internal format.  Assigned the coding-system (Lisp
5135         symbol) `emacs-mule' by default.
5136
5137    o coding-category-sjis
5138
5139         The category for a coding system which has the same code range
5140         as SJIS.  Assigned the coding-system (Lisp
5141         symbol) `japanese-shift-jis' by default.
5142
5143    o coding-category-iso-7
5144
5145         The category for a coding system which has the same code range
5146         as ISO2022 of 7-bit environment.  This doesn't use any locking
5147         shift and single shift functions.  This can encode/decode all
5148         charsets.  Assigned the coding-system (Lisp symbol)
5149         `iso-2022-7bit' by default.
5150
5151    o coding-category-iso-7-tight
5152
5153         Same as coding-category-iso-7 except that this can
5154         encode/decode only the specified charsets.
5155
5156    o coding-category-iso-8-1
5157
5158         The category for a coding system which has the same code range
5159         as ISO2022 of 8-bit environment and graphic plane 1 used only
5160         for DIMENSION1 charset.  This doesn't use any locking shift
5161         and single shift functions.  Assigned the coding-system (Lisp
5162         symbol) `iso-latin-1' by default.
5163
5164    o coding-category-iso-8-2
5165
5166         The category for a coding system which has the same code range
5167         as ISO2022 of 8-bit environment and graphic plane 1 used only
5168         for DIMENSION2 charset.  This doesn't use any locking shift
5169         and single shift functions.  Assigned the coding-system (Lisp
5170         symbol) `japanese-iso-8bit' by default.
5171
5172    o coding-category-iso-7-else
5173
5174         The category for a coding system which has the same code range
5175         as ISO2022 of 7-bit environemnt but uses locking shift or
5176         single shift functions.  Assigned the coding-system (Lisp
5177         symbol) `iso-2022-7bit-lock' by default.
5178
5179    o coding-category-iso-8-else
5180
5181         The category for a coding system which has the same code range
5182         as ISO2022 of 8-bit environemnt but uses locking shift or
5183         single shift functions.  Assigned the coding-system (Lisp
5184         symbol) `iso-2022-8bit-ss2' by default.
5185
5186    o coding-category-big5
5187
5188         The category for a coding system which has the same code range
5189         as BIG5.  Assigned the coding-system (Lisp symbol)
5190         `cn-big5' by default.
5191
5192    o coding-category-utf-8
5193
5194         The category for a coding system which has the same code range
5195         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
5196         symbol) `utf-8' by default.
5197
5198    o coding-category-utf-16-be
5199
5200         The category for a coding system in which a text has an
5201         Unicode signature (cf. Unicode Standard) in the order of BIG
5202         endian at the head.  Assigned the coding-system (Lisp symbol)
5203         `utf-16-be' by default.
5204
5205    o coding-category-utf-16-le
5206
5207         The category for a coding system in which a text has an
5208         Unicode signature (cf. Unicode Standard) in the order of
5209         LITTLE endian at the head.  Assigned the coding-system (Lisp
5210         symbol) `utf-16-le' by default.
5211
5212    o coding-category-ccl
5213
5214         The category for a coding system of which encoder/decoder is
5215         written in CCL programs.  The default value is nil, i.e., no
5216         coding system is assigned.
5217
5218    o coding-category-binary
5219
5220         The category for a coding system not categorized in any of the
5221         above.  Assigned the coding-system (Lisp symbol)
5222         `no-conversion' by default.
5223
5224    Each of them is a Lisp symbol and the value is an actual
5225    `coding-system's (this is also a Lisp symbol) assigned by a user.
5226    What Emacs does actually is to detect a category of coding system.
5227    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5228    decide only one possible category, it selects a category of the
5229    highest priority.  Priorities of categories are also specified by a
5230    user in a Lisp variable `coding-category-list'.
5231
5232 */
5233
5234 #define EOL_SEEN_NONE   0
5235 #define EOL_SEEN_LF     1
5236 #define EOL_SEEN_CR     2
5237 #define EOL_SEEN_CRLF   4
5238
5239 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5240    SOURCE is encoded.  If CATEGORY is one of
5241    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5242    two-byte, else they are encoded by one-byte.
5243
5244    Return one of EOL_SEEN_XXX.  */
5245
5246 #define MAX_EOL_CHECK_COUNT 3
5247
5248 static int
5249 detect_eol (source, src_bytes, category)
5250      const unsigned char *source;
5251      EMACS_INT src_bytes;
5252      enum coding_category category;
5253 {
5254   const unsigned char *src = source, *src_end = src + src_bytes;
5255   unsigned char c;
5256   int total  = 0;
5257   int eol_seen = EOL_SEEN_NONE;
5258
5259   if ((1 << category) & CATEGORY_MASK_UTF_16)
5260     {
5261       int msb, lsb;
5262
5263       msb = category == (coding_category_utf_16_le
5264                          | coding_category_utf_16_le_nosig);
5265       lsb = 1 - msb;
5266
5267       while (src + 1 < src_end)
5268         {
5269           c = src[lsb];
5270           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5271             {
5272               int this_eol;
5273
5274               if (c == '\n')
5275                 this_eol = EOL_SEEN_LF;
5276               else if (src + 3 >= src_end
5277                        || src[msb + 2] != 0
5278                        || src[lsb + 2] != '\n')
5279                 this_eol = EOL_SEEN_CR;
5280               else
5281                 this_eol = EOL_SEEN_CRLF;
5282
5283               if (eol_seen == EOL_SEEN_NONE)
5284                 /* This is the first end-of-line.  */
5285                 eol_seen = this_eol;
5286               else if (eol_seen != this_eol)
5287                 {
5288                   /* The found type is different from what found before.  */
5289                   eol_seen = EOL_SEEN_LF;
5290                   break;
5291                 }
5292               if (++total == MAX_EOL_CHECK_COUNT)
5293                 break;
5294             }
5295           src += 2;
5296         }
5297     }
5298   else
5299     {
5300       while (src < src_end)
5301         {
5302           c = *src++;
5303           if (c == '\n' || c == '\r')
5304             {
5305               int this_eol;
5306
5307               if (c == '\n')
5308                 this_eol = EOL_SEEN_LF;
5309               else if (src >= src_end || *src != '\n')
5310                 this_eol = EOL_SEEN_CR;
5311               else
5312                 this_eol = EOL_SEEN_CRLF, src++;
5313
5314               if (eol_seen == EOL_SEEN_NONE)
5315                 /* This is the first end-of-line.  */
5316                 eol_seen = this_eol;
5317               else if (eol_seen != this_eol)
5318                 {
5319                   /* The found type is different from what found before.  */
5320                   eol_seen = EOL_SEEN_LF;
5321                   break;
5322                 }
5323               if (++total == MAX_EOL_CHECK_COUNT)
5324                 break;
5325             }
5326         }
5327     }
5328   return eol_seen;
5329 }
5330
5331
5332 static Lisp_Object
5333 adjust_coding_eol_type (coding, eol_seen)
5334      struct coding_system *coding;
5335      int eol_seen;
5336 {
5337   Lisp_Object eol_type;
5338
5339   eol_type = CODING_ID_EOL_TYPE (coding->id);
5340   if (eol_seen & EOL_SEEN_LF)
5341     {
5342       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5343       eol_type = Qunix;
5344     }
5345   else if (eol_seen & EOL_SEEN_CRLF)
5346     {
5347       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5348       eol_type = Qdos;
5349     }
5350   else if (eol_seen & EOL_SEEN_CR)
5351     {
5352       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5353       eol_type = Qmac;
5354     }
5355   return eol_type;
5356 }
5357
5358 /* Detect how a text specified in CODING is encoded.  If a coding
5359    system is detected, update fields of CODING by the detected coding
5360    system.  */
5361
5362 void
5363 detect_coding (coding)
5364      struct coding_system *coding;
5365 {
5366   const unsigned char *src, *src_end;
5367   Lisp_Object attrs, coding_type;
5368
5369   coding->consumed = coding->consumed_char = 0;
5370   coding->produced = coding->produced_char = 0;
5371   coding_set_source (coding);
5372
5373   src_end = coding->source + coding->src_bytes;
5374
5375   /* If we have not yet decided the text encoding type, detect it
5376      now.  */
5377   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5378     {
5379       int c, i;
5380
5381       for (i = 0, src = coding->source; src < src_end; i++, src++)
5382         {
5383           c = *src;
5384           if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5385                                         || c == ISO_CODE_SI
5386                                         || c == ISO_CODE_SO)))
5387             break;
5388         }
5389       coding->head_ascii = src - (coding->source + coding->consumed);
5390
5391       if (coding->head_ascii < coding->src_bytes)
5392         {
5393           struct coding_detection_info detect_info;
5394           enum coding_category category;
5395           struct coding_system *this;
5396
5397           detect_info.checked = detect_info.found = detect_info.rejected = 0;
5398           for (i = 0; i < coding_category_raw_text; i++)
5399             {
5400               category = coding_priorities[i];
5401               this = coding_categories + category;
5402               if (this->id < 0)
5403                 {
5404                   /* No coding system of this category is defined.  */
5405                   detect_info.rejected |= (1 << category);
5406                 }
5407               else if (category >= coding_category_raw_text)
5408                 continue;
5409               else if (detect_info.checked & (1 << category))
5410                 {
5411                   if (detect_info.found & (1 << category))
5412                     break;
5413                 }
5414               else if ((*(this->detector)) (coding, &detect_info)
5415                        && detect_info.found & (1 << category))
5416                 {
5417                   if (category == coding_category_utf_16_auto)
5418                     {
5419                       if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5420                         category = coding_category_utf_16_le;
5421                       else
5422                         category = coding_category_utf_16_be;
5423                     }
5424                   break;
5425                 }
5426             }
5427           if (i < coding_category_raw_text)
5428             setup_coding_system (CODING_ID_NAME (this->id), coding);
5429           else if (detect_info.rejected == CATEGORY_MASK_ANY)
5430             setup_coding_system (Qraw_text, coding);
5431           else if (detect_info.rejected)
5432             for (i = 0; i < coding_category_raw_text; i++)
5433               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5434                 {
5435                   this = coding_categories + coding_priorities[i];
5436                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5437                   break;
5438                 }
5439         }
5440     }
5441   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5442            == coding_category_utf_16_auto)
5443     {
5444       Lisp_Object coding_systems;
5445       struct coding_detection_info detect_info;
5446
5447       coding_systems
5448         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5449       detect_info.found = detect_info.rejected = 0;
5450       if (CONSP (coding_systems)
5451           && detect_coding_utf_16 (coding, &detect_info))
5452         {
5453           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5454             setup_coding_system (XCAR (coding_systems), coding);
5455           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
5456             setup_coding_system (XCDR (coding_systems), coding);
5457         }
5458     }
5459 }
5460
5461
5462 static void
5463 decode_eol (coding)
5464      struct coding_system *coding;
5465 {
5466   Lisp_Object eol_type;
5467   unsigned char *p, *pbeg, *pend;
5468
5469   eol_type = CODING_ID_EOL_TYPE (coding->id);
5470   if (EQ (eol_type, Qunix))
5471     return;
5472
5473   if (NILP (coding->dst_object))
5474     pbeg = coding->destination;
5475   else
5476     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5477   pend = pbeg + coding->produced;
5478
5479   if (VECTORP (eol_type))
5480     {
5481       int eol_seen = EOL_SEEN_NONE;
5482
5483       for (p = pbeg; p < pend; p++)
5484         {
5485           if (*p == '\n')
5486             eol_seen |= EOL_SEEN_LF;
5487           else if (*p == '\r')
5488             {
5489               if (p + 1 < pend && *(p + 1) == '\n')
5490                 {
5491                   eol_seen |= EOL_SEEN_CRLF;
5492                   p++;
5493                 }
5494               else
5495                 eol_seen |= EOL_SEEN_CR;
5496             }
5497         }
5498       if (eol_seen != EOL_SEEN_NONE
5499           && eol_seen != EOL_SEEN_LF
5500           && eol_seen != EOL_SEEN_CRLF
5501           && eol_seen != EOL_SEEN_CR)
5502         eol_seen = EOL_SEEN_LF;
5503       if (eol_seen != EOL_SEEN_NONE)
5504         eol_type = adjust_coding_eol_type (coding, eol_seen);
5505     }
5506
5507   if (EQ (eol_type, Qmac))
5508     {
5509       for (p = pbeg; p < pend; p++)
5510         if (*p == '\r')
5511           *p = '\n';
5512     }
5513   else if (EQ (eol_type, Qdos))
5514     {
5515       int n = 0;
5516
5517       if (NILP (coding->dst_object))
5518         {
5519           for (p = pend - 2; p >= pbeg; p--)
5520             if (*p == '\r')
5521               {
5522                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5523                 n++;
5524               }
5525         }
5526       else
5527         {
5528           for (p = pend - 2; p >= pbeg; p--)
5529             if (*p == '\r')
5530               {
5531                 int pos_byte = coding->dst_pos_byte + (p - pbeg);
5532                 int pos = BYTE_TO_CHAR (pos_byte);
5533
5534                 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5535                 n++;
5536               }
5537         }
5538       coding->produced -= n;
5539       coding->produced_char -= n;
5540     }
5541 }
5542
5543
5544 /* Return a translation table (or list of them) from coding system
5545    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5546    decoding (ENCODEP is zero). */
5547
5548 static Lisp_Object
5549 get_translation_table (attrs, encodep, max_lookup)
5550      Lisp_Object attrs;
5551      int encodep, *max_lookup;
5552 {
5553   Lisp_Object standard, translation_table;
5554   Lisp_Object val;
5555
5556   if (encodep)
5557     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
5558       standard = Vstandard_translation_table_for_encode;
5559   else
5560     translation_table = CODING_ATTR_DECODE_TBL (attrs),
5561       standard = Vstandard_translation_table_for_decode;
5562   if (NILP (translation_table))
5563     translation_table = standard;
5564   else
5565     {
5566       if (SYMBOLP (translation_table))
5567         translation_table = Fget (translation_table, Qtranslation_table);
5568       else if (CONSP (translation_table))
5569         {
5570           translation_table = Fcopy_sequence (translation_table);
5571           for (val = translation_table; CONSP (val); val = XCDR (val))
5572             if (SYMBOLP (XCAR (val)))
5573               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
5574         }
5575       if (CHAR_TABLE_P (standard))
5576         {
5577           if (CONSP (translation_table))
5578             translation_table = nconc2 (translation_table,
5579                                         Fcons (standard, Qnil));
5580           else
5581             translation_table = Fcons (translation_table,
5582                                        Fcons (standard, Qnil));
5583         }
5584     }
5585
5586   if (max_lookup)
5587     {
5588       *max_lookup = 1;
5589       if (CHAR_TABLE_P (translation_table)
5590           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
5591         {
5592           val = XCHAR_TABLE (translation_table)->extras[1];
5593           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5594             *max_lookup = XFASTINT (val);
5595         }
5596       else if (CONSP (translation_table))
5597         {
5598           Lisp_Object tail, val;
5599
5600           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
5601             if (CHAR_TABLE_P (XCAR (tail))
5602                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
5603               {
5604                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
5605                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5606                   *max_lookup = XFASTINT (val);
5607               }
5608         }
5609     }
5610   return translation_table;
5611 }
5612
5613 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
5614   do {                                                          \
5615     trans = Qnil;                                               \
5616     if (CHAR_TABLE_P (table))                                   \
5617       {                                                         \
5618         trans = CHAR_TABLE_REF (table, c);                      \
5619         if (CHARACTERP (trans))                                 \
5620           c = XFASTINT (trans), trans = Qnil;                   \
5621       }                                                         \
5622     else if (CONSP (table))                                     \
5623       {                                                         \
5624         Lisp_Object tail;                                       \
5625                                                                 \
5626         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
5627           if (CHAR_TABLE_P (XCAR (tail)))                       \
5628             {                                                   \
5629               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
5630               if (CHARACTERP (trans))                           \
5631                 c = XFASTINT (trans), trans = Qnil;             \
5632               else if (! NILP (trans))                          \
5633                 break;                                          \
5634             }                                                   \
5635       }                                                         \
5636   } while (0)
5637
5638
5639 static Lisp_Object
5640 get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
5641      Lisp_Object val;
5642      int *buf, *buf_end;
5643      int last_block;
5644      int *from_nchars, *to_nchars;
5645 {
5646   /* VAL is TO or (([FROM-CHAR ...] .  TO) ...) where TO is TO-CHAR or
5647      [TO-CHAR ...].  */
5648   if (CONSP (val))
5649     {
5650       Lisp_Object from, tail;
5651       int i, len;
5652
5653       for (tail = val; CONSP (tail); tail = XCDR (tail))
5654         {
5655           val = XCAR (tail);
5656           from = XCAR (val);
5657           len = ASIZE (from);
5658           for (i = 0; i < len; i++)
5659             {
5660               if (buf + i == buf_end)
5661                 {
5662                   if (! last_block)
5663                     return Qt;
5664                   break;
5665                 }
5666               if (XINT (AREF (from, i)) != buf[i])
5667                 break;
5668             }
5669           if (i == len)
5670             {
5671               val = XCDR (val);
5672               *from_nchars = len;
5673               break;
5674             }
5675         }
5676       if (! CONSP (tail))
5677         return Qnil;
5678     }
5679   if (VECTORP (val))
5680     *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
5681   else
5682     *buf = XINT (val);
5683   return val;
5684 }
5685
5686
5687 static int
5688 produce_chars (coding, translation_table, last_block)
5689      struct coding_system *coding;
5690      Lisp_Object translation_table;
5691      int last_block;
5692 {
5693   unsigned char *dst = coding->destination + coding->produced;
5694   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5695   int produced;
5696   int produced_chars = 0;
5697   int carryover = 0;
5698
5699   if (! coding->chars_at_source)
5700     {
5701       /* Characters are in coding->charbuf.  */
5702       int *buf = coding->charbuf;
5703       int *buf_end = buf + coding->charbuf_used;
5704
5705       if (BUFFERP (coding->src_object)
5706           && EQ (coding->src_object, coding->dst_object))
5707         dst_end = ((unsigned char *) coding->source) + coding->consumed;
5708
5709       while (buf < buf_end)
5710         {
5711           int c = *buf, i;
5712
5713           if (c >= 0)
5714             {
5715               int from_nchars = 1, to_nchars = 1;
5716               Lisp_Object trans = Qnil;
5717
5718               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
5719               if (! NILP (trans))
5720                 {
5721                   trans = get_translation (trans, buf, buf_end, last_block,
5722                                            &from_nchars, &to_nchars);
5723                   if (EQ (trans, Qt))
5724                     break;
5725                   c = *buf;
5726                 }
5727
5728               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
5729                 {
5730                   dst = alloc_destination (coding,
5731                                            buf_end - buf
5732                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
5733                                            dst);
5734                   dst_end = coding->destination + coding->dst_bytes;
5735                 }
5736
5737               for (i = 0; i < to_nchars; i++)
5738                 {
5739                   if (i > 0)
5740                     c = XINT (AREF (trans, i));
5741                   if (coding->dst_multibyte
5742                       || ! CHAR_BYTE8_P (c))
5743                     CHAR_STRING_ADVANCE (c, dst);
5744                   else
5745                     *dst++ = CHAR_TO_BYTE8 (c);
5746                 }
5747               produced_chars += to_nchars;
5748               *buf++ = to_nchars;
5749               while (--from_nchars > 0)
5750                 *buf++ = 0;
5751             }
5752           else
5753             /* This is an annotation datum.  (-C) is the length.  */
5754             buf += -c;
5755         }
5756       carryover = buf_end - buf;
5757     }
5758   else
5759     {
5760       const unsigned char *src = coding->source;
5761       const unsigned char *src_end = src + coding->src_bytes;
5762       Lisp_Object eol_type;
5763
5764       eol_type = CODING_ID_EOL_TYPE (coding->id);
5765
5766       if (coding->src_multibyte != coding->dst_multibyte)
5767         {
5768           if (coding->src_multibyte)
5769             {
5770               int multibytep = 1;
5771               int consumed_chars;
5772
5773               while (1)
5774                 {
5775                   const unsigned char *src_base = src;
5776                   int c;
5777
5778                   ONE_MORE_BYTE (c);
5779                   if (c == '\r')
5780                     {
5781                       if (EQ (eol_type, Qdos))
5782                         {
5783                           if (src == src_end)
5784                             {
5785                               record_conversion_result
5786                                 (coding, CODING_RESULT_INSUFFICIENT_SRC);
5787                               goto no_more_source;
5788                             }
5789                           if (*src == '\n')
5790                             c = *src++;
5791                         }
5792                       else if (EQ (eol_type, Qmac))
5793                         c = '\n';
5794                     }
5795                   if (dst == dst_end)
5796                     {
5797                       coding->consumed = src - coding->source;
5798
5799                     if (EQ (coding->src_object, coding->dst_object))
5800                       dst_end = (unsigned char *) src;
5801                     if (dst == dst_end)
5802                       {
5803                         dst = alloc_destination (coding, src_end - src + 1,
5804                                                  dst);
5805                         dst_end = coding->destination + coding->dst_bytes;
5806                         coding_set_source (coding);
5807                         src = coding->source + coding->consumed;
5808                         src_end = coding->source + coding->src_bytes;
5809                       }
5810                     }
5811                   *dst++ = c;
5812                   produced_chars++;
5813                 }
5814             no_more_source:
5815               ;
5816             }
5817           else
5818             while (src < src_end)
5819               {
5820                 int multibytep = 1;
5821                 int c = *src++;
5822
5823                 if (c == '\r')
5824                   {
5825                     if (EQ (eol_type, Qdos))
5826                       {
5827                         if (src < src_end
5828                             && *src == '\n')
5829                           c = *src++;
5830                       }
5831                     else if (EQ (eol_type, Qmac))
5832                       c = '\n';
5833                   }
5834                 if (dst >= dst_end - 1)
5835                   {
5836                     coding->consumed = src - coding->source;
5837
5838                     if (EQ (coding->src_object, coding->dst_object))
5839                       dst_end = (unsigned char *) src;
5840                     if (dst >= dst_end - 1)
5841                       {
5842                         dst = alloc_destination (coding, src_end - src + 2,
5843                                                  dst);
5844                         dst_end = coding->destination + coding->dst_bytes;
5845                         coding_set_source (coding);
5846                         src = coding->source + coding->consumed;
5847                         src_end = coding->source + coding->src_bytes;
5848                       }
5849                   }
5850                 EMIT_ONE_BYTE (c);
5851               }
5852         }
5853       else
5854         {
5855           if (!EQ (coding->src_object, coding->dst_object))
5856             {
5857               int require = coding->src_bytes - coding->dst_bytes;
5858
5859               if (require > 0)
5860                 {
5861                   EMACS_INT offset = src - coding->source;
5862
5863                   dst = alloc_destination (coding, require, dst);
5864                   coding_set_source (coding);
5865                   src = coding->source + offset;
5866                   src_end = coding->source + coding->src_bytes;
5867                 }
5868             }
5869           produced_chars = coding->src_chars;
5870           while (src < src_end)
5871             {
5872               int c = *src++;
5873
5874               if (c == '\r')
5875                 {
5876                   if (EQ (eol_type, Qdos))
5877                     {
5878                       if (src < src_end
5879                           && *src == '\n')
5880                         c = *src++;
5881                       produced_chars--;
5882                     }
5883                   else if (EQ (eol_type, Qmac))
5884                     c = '\n';
5885                 }
5886               *dst++ = c;
5887             }
5888         }
5889       coding->consumed = coding->src_bytes;
5890       coding->consumed_char = coding->src_chars;
5891     }
5892
5893   produced = dst - (coding->destination + coding->produced);
5894   if (BUFFERP (coding->dst_object))
5895     insert_from_gap (produced_chars, produced);
5896   coding->produced += produced;
5897   coding->produced_char += produced_chars;
5898   return carryover;
5899 }
5900
5901 /* Compose text in CODING->object according to the annotation data at
5902    CHARBUF.  CHARBUF is an array:
5903      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5904  */
5905
5906 static INLINE void
5907 produce_composition (coding, charbuf, pos)
5908      struct coding_system *coding;
5909      int *charbuf;
5910      EMACS_INT pos;
5911 {
5912   int len;
5913   EMACS_INT to;
5914   enum composition_method method;
5915   Lisp_Object components;
5916
5917   len = -charbuf[0];
5918   to = pos + charbuf[2];
5919   method = (enum composition_method) (charbuf[3]);
5920
5921   if (method == COMPOSITION_RELATIVE)
5922     components = Qnil;
5923   else
5924     {
5925       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5926       int i;
5927
5928       len -= 4;
5929       charbuf += 4;
5930       for (i = 0; i < len; i++)
5931         args[i] = make_number (charbuf[i]);
5932       components = (method == COMPOSITION_WITH_ALTCHARS
5933                     ? Fstring (len, args) : Fvector (len, args));
5934     }
5935   compose_text (pos, to, components, Qnil, coding->dst_object);
5936 }
5937
5938
5939 /* Put `charset' property on text in CODING->object according to
5940    the annotation data at CHARBUF.  CHARBUF is an array:
5941      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
5942  */
5943
5944 static INLINE void
5945 produce_charset (coding, charbuf, pos)
5946      struct coding_system *coding;
5947      int *charbuf;
5948      EMACS_INT pos;
5949 {
5950   EMACS_INT from = pos - charbuf[2];
5951   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
5952
5953   Fput_text_property (make_number (from), make_number (pos),
5954                       Qcharset, CHARSET_NAME (charset),
5955                       coding->dst_object);
5956 }
5957
5958
5959 #define CHARBUF_SIZE 0x4000
5960
5961 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
5962   do {                                                                  \
5963     int size = CHARBUF_SIZE;;                                           \
5964                                                                         \
5965     coding->charbuf = NULL;                                             \
5966     while (size > 1024)                                                 \
5967       {                                                                 \
5968         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
5969         if (coding->charbuf)                                            \
5970           break;                                                        \
5971         size >>= 1;                                                     \
5972       }                                                                 \
5973     if (! coding->charbuf)                                              \
5974       {                                                                 \
5975         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
5976         return coding->result;                                          \
5977       }                                                                 \
5978     coding->charbuf_size = size;                                        \
5979   } while (0)
5980
5981
5982 static void
5983 produce_annotation (coding, pos)
5984      struct coding_system *coding;
5985      EMACS_INT pos;
5986 {
5987   int *charbuf = coding->charbuf;
5988   int *charbuf_end = charbuf + coding->charbuf_used;
5989
5990   if (NILP (coding->dst_object))
5991     return;
5992
5993   while (charbuf < charbuf_end)
5994     {
5995       if (*charbuf >= 0)
5996         pos += *charbuf++;
5997       else
5998         {
5999           int len = -*charbuf;
6000           switch (charbuf[1])
6001             {
6002             case CODING_ANNOTATE_COMPOSITION_MASK:
6003               produce_composition (coding, charbuf, pos);
6004               break;
6005             case CODING_ANNOTATE_CHARSET_MASK:
6006               produce_charset (coding, charbuf, pos);
6007               break;
6008             default:
6009               abort ();
6010             }
6011           charbuf += len;
6012         }
6013     }
6014 }
6015
6016 /* Decode the data at CODING->src_object into CODING->dst_object.
6017    CODING->src_object is a buffer, a string, or nil.
6018    CODING->dst_object is a buffer.
6019
6020    If CODING->src_object is a buffer, it must be the current buffer.
6021    In this case, if CODING->src_pos is positive, it is a position of
6022    the source text in the buffer, otherwise, the source text is in the
6023    gap area of the buffer, and CODING->src_pos specifies the offset of
6024    the text from GPT (which must be the same as PT).  If this is the
6025    same buffer as CODING->dst_object, CODING->src_pos must be
6026    negative.
6027
6028    If CODING->src_object is a string, CODING->src_pos in an index to
6029    that string.
6030
6031    If CODING->src_object is nil, CODING->source must already point to
6032    the non-relocatable memory area.  In this case, CODING->src_pos is
6033    an offset from CODING->source.
6034
6035    The decoded data is inserted at the current point of the buffer
6036    CODING->dst_object.
6037 */
6038
6039 static int
6040 decode_coding (coding)
6041      struct coding_system *coding;
6042 {
6043   Lisp_Object attrs;
6044   Lisp_Object undo_list;
6045   Lisp_Object translation_table;
6046   int carryover;
6047   int i;
6048
6049   if (BUFFERP (coding->src_object)
6050       && coding->src_pos > 0
6051       && coding->src_pos < GPT
6052       && coding->src_pos + coding->src_chars > GPT)
6053     move_gap_both (coding->src_pos, coding->src_pos_byte);
6054
6055   undo_list = Qt;
6056   if (BUFFERP (coding->dst_object))
6057     {
6058       if (current_buffer != XBUFFER (coding->dst_object))
6059         set_buffer_internal (XBUFFER (coding->dst_object));
6060       if (GPT != PT)
6061         move_gap_both (PT, PT_BYTE);
6062       undo_list = current_buffer->undo_list;
6063       current_buffer->undo_list = Qt;
6064     }
6065
6066   coding->consumed = coding->consumed_char = 0;
6067   coding->produced = coding->produced_char = 0;
6068   coding->chars_at_source = 0;
6069   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6070   coding->errors = 0;
6071
6072   ALLOC_CONVERSION_WORK_AREA (coding);
6073
6074   attrs = CODING_ID_ATTRS (coding->id);
6075   translation_table = get_translation_table (attrs, 0, NULL);
6076
6077   carryover = 0;
6078   do
6079     {
6080       EMACS_INT pos = coding->dst_pos + coding->produced_char;
6081
6082       coding_set_source (coding);
6083       coding->annotated = 0;
6084       coding->charbuf_used = carryover;
6085       (*(coding->decoder)) (coding);
6086       coding_set_destination (coding);
6087       carryover = produce_chars (coding, translation_table, 0);
6088       if (coding->annotated)
6089         produce_annotation (coding, pos);
6090       for (i = 0; i < carryover; i++)
6091         coding->charbuf[i]
6092           = coding->charbuf[coding->charbuf_used - carryover + i];
6093     }
6094   while (coding->consumed < coding->src_bytes
6095          && ! coding->result);
6096
6097   if (carryover > 0)
6098     {
6099       coding_set_destination (coding);
6100       coding->charbuf_used = carryover;
6101       produce_chars (coding, translation_table, 1);
6102     }
6103
6104   coding->carryover_bytes = 0;
6105   if (coding->consumed < coding->src_bytes)
6106     {
6107       int nbytes = coding->src_bytes - coding->consumed;
6108       const unsigned char *src;
6109
6110       coding_set_source (coding);
6111       coding_set_destination (coding);
6112       src = coding->source + coding->consumed;
6113
6114       if (coding->mode & CODING_MODE_LAST_BLOCK)
6115         {
6116           /* Flush out unprocessed data as binary chars.  We are sure
6117              that the number of data is less than the size of
6118              coding->charbuf.  */
6119           coding->charbuf_used = 0;
6120           while (nbytes-- > 0)
6121             {
6122               int c = *src++;
6123
6124               coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
6125             }
6126           produce_chars (coding, Qnil, 1);
6127         }
6128       else
6129         {
6130           /* Record unprocessed bytes in coding->carryover.  We are
6131              sure that the number of data is less than the size of
6132              coding->carryover.  */
6133           unsigned char *p = coding->carryover;
6134
6135           coding->carryover_bytes = nbytes;
6136           while (nbytes-- > 0)
6137             *p++ = *src++;
6138         }
6139       coding->consumed = coding->src_bytes;
6140     }
6141
6142   if (BUFFERP (coding->dst_object))
6143     {
6144       current_buffer->undo_list = undo_list;
6145       record_insert (coding->dst_pos, coding->produced_char);
6146     }
6147   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6148     decode_eol (coding);
6149   return coding->result;
6150 }
6151
6152
6153 /* Extract an annotation datum from a composition starting at POS and
6154    ending before LIMIT of CODING->src_object (buffer or string), store
6155    the data in BUF, set *STOP to a starting position of the next
6156    composition (if any) or to LIMIT, and return the address of the
6157    next element of BUF.
6158
6159    If such an annotation is not found, set *STOP to a starting
6160    position of a composition after POS (if any) or to LIMIT, and
6161    return BUF.  */
6162
6163 static INLINE int *
6164 handle_composition_annotation (pos, limit, coding, buf, stop)
6165      EMACS_INT pos, limit;
6166      struct coding_system *coding;
6167      int *buf;
6168      EMACS_INT *stop;
6169 {
6170   EMACS_INT start, end;
6171   Lisp_Object prop;
6172
6173   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6174       || end > limit)
6175     *stop = limit;
6176   else if (start > pos)
6177     *stop = start;
6178   else
6179     {
6180       if (start == pos)
6181         {
6182           /* We found a composition.  Store the corresponding
6183              annotation data in BUF.  */
6184           int *head = buf;
6185           enum composition_method method = COMPOSITION_METHOD (prop);
6186           int nchars = COMPOSITION_LENGTH (prop);
6187
6188           ADD_COMPOSITION_DATA (buf, nchars, method);
6189           if (method != COMPOSITION_RELATIVE)
6190             {
6191               Lisp_Object components;
6192               int len, i, i_byte;
6193
6194               components = COMPOSITION_COMPONENTS (prop);
6195               if (VECTORP (components))
6196                 {
6197                   len = XVECTOR (components)->size;
6198                   for (i = 0; i < len; i++)
6199                     *buf++ = XINT (AREF (components, i));
6200                 }
6201               else if (STRINGP (components))
6202                 {
6203                   len = SCHARS (components);
6204                   i = i_byte = 0;
6205                   while (i < len)
6206                     {
6207                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6208                       buf++;
6209                     }
6210                 }
6211               else if (INTEGERP (components))
6212                 {
6213                   len = 1;
6214                   *buf++ = XINT (components);
6215                 }
6216               else if (CONSP (components))
6217                 {
6218                   for (len = 0; CONSP (components);
6219                        len++, components = XCDR (components))
6220                     *buf++ = XINT (XCAR (components));
6221                 }
6222               else
6223                 abort ();
6224               *head -= len;
6225             }
6226         }
6227
6228       if (find_composition (end, limit, &start, &end, &prop,
6229                             coding->src_object)
6230           && end <= limit)
6231         *stop = start;
6232       else
6233         *stop = limit;
6234     }
6235   return buf;
6236 }
6237
6238
6239 /* Extract an annotation datum from a text property `charset' at POS of
6240    CODING->src_object (buffer of string), store the data in BUF, set
6241    *STOP to the position where the value of `charset' property changes
6242    (limiting by LIMIT), and return the address of the next element of
6243    BUF.
6244
6245    If the property value is nil, set *STOP to the position where the
6246    property value is non-nil (limiting by LIMIT), and return BUF.  */
6247
6248 static INLINE int *
6249 handle_charset_annotation (pos, limit, coding, buf, stop)
6250      EMACS_INT pos, limit;
6251      struct coding_system *coding;
6252      int *buf;
6253      EMACS_INT *stop;
6254 {
6255   Lisp_Object val, next;
6256   int id;
6257
6258   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6259   if (! NILP (val) && CHARSETP (val))
6260     id = XINT (CHARSET_SYMBOL_ID (val));
6261   else
6262     id = -1;
6263   ADD_CHARSET_DATA (buf, 0, id);
6264   next = Fnext_single_property_change (make_number (pos), Qcharset,
6265                                        coding->src_object,
6266                                        make_number (limit));
6267   *stop = XINT (next);
6268   return buf;
6269 }
6270
6271
6272 static void
6273 consume_chars (coding, translation_table, max_lookup)
6274      struct coding_system *coding;
6275      Lisp_Object translation_table;
6276      int max_lookup;
6277 {
6278   int *buf = coding->charbuf;
6279   int *buf_end = coding->charbuf + coding->charbuf_size;
6280   const unsigned char *src = coding->source + coding->consumed;
6281   const unsigned char *src_end = coding->source + coding->src_bytes;
6282   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6283   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6284   int multibytep = coding->src_multibyte;
6285   Lisp_Object eol_type;
6286   int c;
6287   EMACS_INT stop, stop_composition, stop_charset;
6288   int *lookup_buf = NULL;
6289
6290   if (! NILP (translation_table))
6291     lookup_buf = alloca (sizeof (int) * max_lookup);
6292
6293   eol_type = CODING_ID_EOL_TYPE (coding->id);
6294   if (VECTORP (eol_type))
6295     eol_type = Qunix;
6296
6297   /* Note: composition handling is not yet implemented.  */
6298   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6299
6300   if (NILP (coding->src_object))
6301     stop = stop_composition = stop_charset = end_pos;
6302   else
6303     {
6304       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6305         stop = stop_composition = pos;
6306       else
6307         stop = stop_composition = end_pos;
6308       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6309         stop = stop_charset = pos;
6310       else
6311         stop_charset = end_pos;
6312     }
6313
6314   /* Compensate for CRLF and conversion.  */
6315   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6316   while (buf < buf_end)
6317     {
6318       Lisp_Object trans;
6319
6320       if (pos == stop)
6321         {
6322           if (pos == end_pos)
6323             break;
6324           if (pos == stop_composition)
6325             buf = handle_composition_annotation (pos, end_pos, coding,
6326                                                  buf, &stop_composition);
6327           if (pos == stop_charset)
6328             buf = handle_charset_annotation (pos, end_pos, coding,
6329                                              buf, &stop_charset);
6330           stop = (stop_composition < stop_charset
6331                   ? stop_composition : stop_charset);
6332         }
6333
6334       if (! multibytep)
6335         {
6336           EMACS_INT bytes;
6337
6338           if (! CODING_FOR_UNIBYTE (coding)
6339               && (bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6340             c = STRING_CHAR_ADVANCE (src), pos += bytes;
6341           else
6342             c = *src++, pos++;
6343         }
6344       else
6345         c = STRING_CHAR_ADVANCE (src), pos++;
6346       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6347         c = '\n';
6348       if (! EQ (eol_type, Qunix))
6349         {
6350           if (c == '\n')
6351             {
6352               if (EQ (eol_type, Qdos))
6353                 *buf++ = '\r';
6354               else
6355                 c = '\r';
6356             }
6357         }
6358
6359       trans = Qnil;
6360       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6361       if (NILP (trans))
6362         *buf++ = c;
6363       else
6364         {
6365           int from_nchars = 1, to_nchars = 1;
6366           int *lookup_buf_end;
6367           const unsigned char *p = src;
6368           int i;
6369
6370           lookup_buf[0] = c;
6371           for (i = 1; i < max_lookup && p < src_end; i++)
6372             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6373           lookup_buf_end = lookup_buf + i;
6374           trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6375                                    &from_nchars, &to_nchars);
6376           if (EQ (trans, Qt)
6377               || buf + to_nchars > buf_end)
6378             break;
6379           *buf++ = *lookup_buf;
6380           for (i = 1; i < to_nchars; i++)
6381             *buf++ = XINT (AREF (trans, i));
6382           for (i = 1; i < from_nchars; i++, pos++)
6383             src += MULTIBYTE_LENGTH_NO_CHECK (src);
6384         }
6385     }
6386
6387   coding->consumed = src - coding->source;
6388   coding->consumed_char = pos - coding->src_pos;
6389   coding->charbuf_used = buf - coding->charbuf;
6390   coding->chars_at_source = 0;
6391 }
6392
6393
6394 /* Encode the text at CODING->src_object into CODING->dst_object.
6395    CODING->src_object is a buffer or a string.
6396    CODING->dst_object is a buffer or nil.
6397
6398    If CODING->src_object is a buffer, it must be the current buffer.
6399    In this case, if CODING->src_pos is positive, it is a position of
6400    the source text in the buffer, otherwise. the source text is in the
6401    gap area of the buffer, and coding->src_pos specifies the offset of
6402    the text from GPT (which must be the same as PT).  If this is the
6403    same buffer as CODING->dst_object, CODING->src_pos must be
6404    negative and CODING should not have `pre-write-conversion'.
6405
6406    If CODING->src_object is a string, CODING should not have
6407    `pre-write-conversion'.
6408
6409    If CODING->dst_object is a buffer, the encoded data is inserted at
6410    the current point of that buffer.
6411
6412    If CODING->dst_object is nil, the encoded data is placed at the
6413    memory area specified by CODING->destination.  */
6414
6415 static int
6416 encode_coding (coding)
6417      struct coding_system *coding;
6418 {
6419   Lisp_Object attrs;
6420   Lisp_Object translation_table;
6421   int max_lookup;
6422
6423   attrs = CODING_ID_ATTRS (coding->id);
6424   translation_table = get_translation_table (attrs, 1, &max_lookup);
6425
6426   if (BUFFERP (coding->dst_object))
6427     {
6428       set_buffer_internal (XBUFFER (coding->dst_object));
6429       coding->dst_multibyte
6430         = ! NILP (current_buffer->enable_multibyte_characters);
6431     }
6432
6433   coding->consumed = coding->consumed_char = 0;
6434   coding->produced = coding->produced_char = 0;
6435   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6436   coding->errors = 0;
6437
6438   ALLOC_CONVERSION_WORK_AREA (coding);
6439
6440   do {
6441     coding_set_source (coding);
6442     consume_chars (coding, translation_table, max_lookup);
6443     coding_set_destination (coding);
6444     (*(coding->encoder)) (coding);
6445   } while (coding->consumed_char < coding->src_chars);
6446
6447   if (BUFFERP (coding->dst_object))
6448     insert_from_gap (coding->produced_char, coding->produced);
6449
6450   return (coding->result);
6451 }
6452
6453
6454 /* Name (or base name) of work buffer for code conversion.  */
6455 static Lisp_Object Vcode_conversion_workbuf_name;
6456
6457 /* A working buffer used by the top level conversion.  Once it is
6458    created, it is never destroyed.  It has the name
6459    Vcode_conversion_workbuf_name.  The other working buffers are
6460    destroyed after the use is finished, and their names are modified
6461    versions of Vcode_conversion_workbuf_name.  */
6462 static Lisp_Object Vcode_conversion_reused_workbuf;
6463
6464 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
6465 static int reused_workbuf_in_use;
6466
6467
6468 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
6469    multibyteness of returning buffer.  */
6470
6471 static Lisp_Object
6472 make_conversion_work_buffer (multibyte)
6473      int multibyte;
6474 {
6475   Lisp_Object name, workbuf;
6476   struct buffer *current;
6477
6478   if (reused_workbuf_in_use++)
6479     {
6480       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6481       workbuf = Fget_buffer_create (name);
6482     }
6483   else
6484     {
6485       name = Vcode_conversion_workbuf_name;
6486       workbuf = Fget_buffer_create (name);
6487       if (NILP (Vcode_conversion_reused_workbuf))
6488         Vcode_conversion_reused_workbuf = workbuf;
6489     }
6490   current = current_buffer;
6491   set_buffer_internal (XBUFFER (workbuf));
6492   Ferase_buffer ();
6493   current_buffer->undo_list = Qt;
6494   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
6495   set_buffer_internal (current);
6496   return workbuf;
6497 }
6498
6499
6500 static Lisp_Object
6501 code_conversion_restore (arg)
6502      Lisp_Object arg;
6503 {
6504   Lisp_Object current, workbuf;
6505
6506   current = XCAR (arg);
6507   workbuf = XCDR (arg);
6508   if (! NILP (workbuf))
6509     {
6510       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6511         reused_workbuf_in_use = 0;
6512       else if (! NILP (Fbuffer_live_p (workbuf)))
6513         Fkill_buffer (workbuf);
6514     }
6515   set_buffer_internal (XBUFFER (current));
6516   return Qnil;
6517 }
6518
6519 Lisp_Object
6520 code_conversion_save (with_work_buf, multibyte)
6521      int with_work_buf, multibyte;
6522 {
6523   Lisp_Object workbuf = Qnil;
6524
6525   if (with_work_buf)
6526     workbuf = make_conversion_work_buffer (multibyte);
6527   record_unwind_protect (code_conversion_restore,
6528                          Fcons (Fcurrent_buffer (), workbuf));
6529   return workbuf;
6530 }
6531
6532 int
6533 decode_coding_gap (coding, chars, bytes)
6534      struct coding_system *coding;
6535      EMACS_INT chars, bytes;
6536 {
6537   int count = specpdl_ptr - specpdl;
6538   Lisp_Object attrs;
6539
6540   code_conversion_save (0, 0);
6541
6542   coding->src_object = Fcurrent_buffer ();
6543   coding->src_chars = chars;
6544   coding->src_bytes = bytes;
6545   coding->src_pos = -chars;
6546   coding->src_pos_byte = -bytes;
6547   coding->src_multibyte = chars < bytes;
6548   coding->dst_object = coding->src_object;
6549   coding->dst_pos = PT;
6550   coding->dst_pos_byte = PT_BYTE;
6551   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6552   coding->mode |= CODING_MODE_LAST_BLOCK;
6553
6554   if (CODING_REQUIRE_DETECTION (coding))
6555     detect_coding (coding);
6556
6557   decode_coding (coding);
6558
6559   attrs = CODING_ID_ATTRS (coding->id);
6560   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6561     {
6562       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6563       Lisp_Object val;
6564
6565       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6566       val = call1 (CODING_ATTR_POST_READ (attrs),
6567                    make_number (coding->produced_char));
6568       CHECK_NATNUM (val);
6569       coding->produced_char += Z - prev_Z;
6570       coding->produced += Z_BYTE - prev_Z_BYTE;
6571     }
6572
6573   unbind_to (count, Qnil);
6574   return coding->result;
6575 }
6576
6577 int
6578 encode_coding_gap (coding, chars, bytes)
6579      struct coding_system *coding;
6580      EMACS_INT chars, bytes;
6581 {
6582   int count = specpdl_ptr - specpdl;
6583
6584   code_conversion_save (0, 0);
6585
6586   coding->src_object = Fcurrent_buffer ();
6587   coding->src_chars = chars;
6588   coding->src_bytes = bytes;
6589   coding->src_pos = -chars;
6590   coding->src_pos_byte = -bytes;
6591   coding->src_multibyte = chars < bytes;
6592   coding->dst_object = coding->src_object;
6593   coding->dst_pos = PT;
6594   coding->dst_pos_byte = PT_BYTE;
6595
6596   encode_coding (coding);
6597
6598   unbind_to (count, Qnil);
6599   return coding->result;
6600 }
6601
6602
6603 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6604    SRC_OBJECT into DST_OBJECT by coding context CODING.
6605
6606    SRC_OBJECT is a buffer, a string, or Qnil.
6607
6608    If it is a buffer, the text is at point of the buffer.  FROM and TO
6609    are positions in the buffer.
6610
6611    If it is a string, the text is at the beginning of the string.
6612    FROM and TO are indices to the string.
6613
6614    If it is nil, the text is at coding->source.  FROM and TO are
6615    indices to coding->source.
6616
6617    DST_OBJECT is a buffer, Qt, or Qnil.
6618
6619    If it is a buffer, the decoded text is inserted at point of the
6620    buffer.  If the buffer is the same as SRC_OBJECT, the source text
6621    is deleted.
6622
6623    If it is Qt, a string is made from the decoded text, and
6624    set in CODING->dst_object.
6625
6626    If it is Qnil, the decoded text is stored at CODING->destination.
6627    The caller must allocate CODING->dst_bytes bytes at
6628    CODING->destination by xmalloc.  If the decoded text is longer than
6629    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6630  */
6631
6632 void
6633 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6634                       dst_object)
6635      struct coding_system *coding;
6636      Lisp_Object src_object;
6637      EMACS_INT from, from_byte, to, to_byte;
6638      Lisp_Object dst_object;
6639 {
6640   int count = specpdl_ptr - specpdl;
6641   unsigned char *destination;
6642   EMACS_INT dst_bytes;
6643   EMACS_INT chars = to - from;
6644   EMACS_INT bytes = to_byte - from_byte;
6645   Lisp_Object attrs;
6646   Lisp_Object buffer;
6647   int saved_pt = -1, saved_pt_byte;
6648
6649   buffer = Fcurrent_buffer ();
6650
6651   if (NILP (dst_object))
6652     {
6653       destination = coding->destination;
6654       dst_bytes = coding->dst_bytes;
6655     }
6656
6657   coding->src_object = src_object;
6658   coding->src_chars = chars;
6659   coding->src_bytes = bytes;
6660   coding->src_multibyte = chars < bytes;
6661
6662   if (STRINGP (src_object))
6663     {
6664       coding->src_pos = from;
6665       coding->src_pos_byte = from_byte;
6666     }
6667   else if (BUFFERP (src_object))
6668     {
6669       set_buffer_internal (XBUFFER (src_object));
6670       if (from != GPT)
6671         move_gap_both (from, from_byte);
6672       if (EQ (src_object, dst_object))
6673         {
6674           saved_pt = PT, saved_pt_byte = PT_BYTE;
6675           TEMP_SET_PT_BOTH (from, from_byte);
6676           del_range_both (from, from_byte, to, to_byte, 1);
6677           coding->src_pos = -chars;
6678           coding->src_pos_byte = -bytes;
6679         }
6680       else
6681         {
6682           coding->src_pos = from;
6683           coding->src_pos_byte = from_byte;
6684         }
6685     }
6686
6687   if (CODING_REQUIRE_DETECTION (coding))
6688     detect_coding (coding);
6689   attrs = CODING_ID_ATTRS (coding->id);
6690
6691   if (EQ (dst_object, Qt)
6692       || (! NILP (CODING_ATTR_POST_READ (attrs))
6693           && NILP (dst_object)))
6694     {
6695       coding->dst_object = code_conversion_save (1, 1);
6696       coding->dst_pos = BEG;
6697       coding->dst_pos_byte = BEG_BYTE;
6698       coding->dst_multibyte = 1;
6699     }
6700   else if (BUFFERP (dst_object))
6701     {
6702       code_conversion_save (0, 0);
6703       coding->dst_object = dst_object;
6704       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6705       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6706       coding->dst_multibyte
6707         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6708     }
6709   else
6710     {
6711       code_conversion_save (0, 0);
6712       coding->dst_object = Qnil;
6713       coding->dst_multibyte = 1;
6714     }
6715
6716   decode_coding (coding);
6717
6718   if (BUFFERP (coding->dst_object))
6719     set_buffer_internal (XBUFFER (coding->dst_object));
6720
6721   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6722     {
6723       struct gcpro gcpro1, gcpro2;
6724       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6725       Lisp_Object val;
6726
6727       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6728       GCPRO2 (coding->src_object, coding->dst_object);
6729       val = call1 (CODING_ATTR_POST_READ (attrs),
6730                    make_number (coding->produced_char));
6731       UNGCPRO;
6732       CHECK_NATNUM (val);
6733       coding->produced_char += Z - prev_Z;
6734       coding->produced += Z_BYTE - prev_Z_BYTE;
6735     }
6736
6737   if (EQ (dst_object, Qt))
6738     {
6739       coding->dst_object = Fbuffer_string ();
6740     }
6741   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6742     {
6743       set_buffer_internal (XBUFFER (coding->dst_object));
6744       if (dst_bytes < coding->produced)
6745         {
6746           destination
6747             = (unsigned char *) xrealloc (destination, coding->produced);
6748           if (! destination)
6749             {
6750               record_conversion_result (coding,
6751                                         CODING_RESULT_INSUFFICIENT_DST);
6752               unbind_to (count, Qnil);
6753               return;
6754             }
6755           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6756             move_gap_both (BEGV, BEGV_BYTE);
6757           bcopy (BEGV_ADDR, destination, coding->produced);
6758           coding->destination = destination;
6759         }
6760     }
6761
6762   if (saved_pt >= 0)
6763     {
6764       /* This is the case of:
6765          (BUFFERP (src_object) && EQ (src_object, dst_object))
6766          As we have moved PT while replacing the original buffer
6767          contents, we must recover it now.  */
6768       set_buffer_internal (XBUFFER (src_object));
6769       if (saved_pt < from)
6770         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6771       else if (saved_pt < from + chars)
6772         TEMP_SET_PT_BOTH (from, from_byte);
6773       else if (! NILP (current_buffer->enable_multibyte_characters))
6774         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6775                           saved_pt_byte + (coding->produced - bytes));
6776       else
6777         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6778                           saved_pt_byte + (coding->produced - bytes));
6779     }
6780
6781   unbind_to (count, coding->dst_object);
6782 }
6783
6784
6785 void
6786 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6787                       dst_object)
6788      struct coding_system *coding;
6789      Lisp_Object src_object;
6790      EMACS_INT from, from_byte, to, to_byte;
6791      Lisp_Object dst_object;
6792 {
6793   int count = specpdl_ptr - specpdl;
6794   EMACS_INT chars = to - from;
6795   EMACS_INT bytes = to_byte - from_byte;
6796   Lisp_Object attrs;
6797   Lisp_Object buffer;
6798   int saved_pt = -1, saved_pt_byte;
6799
6800   buffer = Fcurrent_buffer ();
6801
6802   coding->src_object = src_object;
6803   coding->src_chars = chars;
6804   coding->src_bytes = bytes;
6805   coding->src_multibyte = chars < bytes;
6806
6807   attrs = CODING_ID_ATTRS (coding->id);
6808
6809   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6810     {
6811       coding->src_object = code_conversion_save (1, coding->src_multibyte);
6812       set_buffer_internal (XBUFFER (coding->src_object));
6813       if (STRINGP (src_object))
6814         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6815       else if (BUFFERP (src_object))
6816         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6817       else
6818         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6819
6820       if (EQ (src_object, dst_object))
6821         {
6822           set_buffer_internal (XBUFFER (src_object));
6823           saved_pt = PT, saved_pt_byte = PT_BYTE;
6824           del_range_both (from, from_byte, to, to_byte, 1);
6825           set_buffer_internal (XBUFFER (coding->src_object));
6826         }
6827
6828       call2 (CODING_ATTR_PRE_WRITE (attrs),
6829              make_number (BEG), make_number (Z));
6830       coding->src_object = Fcurrent_buffer ();
6831       if (BEG != GPT)
6832         move_gap_both (BEG, BEG_BYTE);
6833       coding->src_chars = Z - BEG;
6834       coding->src_bytes = Z_BYTE - BEG_BYTE;
6835       coding->src_pos = BEG;
6836       coding->src_pos_byte = BEG_BYTE;
6837       coding->src_multibyte = Z < Z_BYTE;
6838     }
6839   else if (STRINGP (src_object))
6840     {
6841       code_conversion_save (0, 0);
6842       coding->src_pos = from;
6843       coding->src_pos_byte = from_byte;
6844     }
6845   else if (BUFFERP (src_object))
6846     {
6847       code_conversion_save (0, 0);
6848       set_buffer_internal (XBUFFER (src_object));
6849       if (EQ (src_object, dst_object))
6850         {
6851           saved_pt = PT, saved_pt_byte = PT_BYTE;
6852           coding->src_object = del_range_1 (from, to, 1, 1);
6853           coding->src_pos = 0;
6854           coding->src_pos_byte = 0;
6855         }
6856       else
6857         {
6858           if (from < GPT && to >= GPT)
6859             move_gap_both (from, from_byte);
6860           coding->src_pos = from;
6861           coding->src_pos_byte = from_byte;
6862         }
6863     }
6864   else
6865     code_conversion_save (0, 0);
6866
6867   if (BUFFERP (dst_object))
6868     {
6869       coding->dst_object = dst_object;
6870       if (EQ (src_object, dst_object))
6871         {
6872           coding->dst_pos = from;
6873           coding->dst_pos_byte = from_byte;
6874         }
6875       else
6876         {
6877           coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6878           coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6879         }
6880       coding->dst_multibyte
6881         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6882     }
6883   else if (EQ (dst_object, Qt))
6884     {
6885       coding->dst_object = Qnil;
6886       coding->dst_bytes = coding->src_chars;
6887       if (coding->dst_bytes == 0)
6888         coding->dst_bytes = 1;
6889       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
6890       coding->dst_multibyte = 0;
6891     }
6892   else
6893     {
6894       coding->dst_object = Qnil;
6895       coding->dst_multibyte = 0;
6896     }
6897
6898   encode_coding (coding);
6899
6900   if (EQ (dst_object, Qt))
6901     {
6902       if (BUFFERP (coding->dst_object))
6903         coding->dst_object = Fbuffer_string ();
6904       else
6905         {
6906           coding->dst_object
6907             = make_unibyte_string ((char *) coding->destination,
6908                                    coding->produced);
6909           xfree (coding->destination);
6910         }
6911     }
6912
6913   if (saved_pt >= 0)
6914     {
6915       /* This is the case of:
6916          (BUFFERP (src_object) && EQ (src_object, dst_object))
6917          As we have moved PT while replacing the original buffer
6918          contents, we must recover it now.  */
6919       set_buffer_internal (XBUFFER (src_object));
6920       if (saved_pt < from)
6921         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6922       else if (saved_pt < from + chars)
6923         TEMP_SET_PT_BOTH (from, from_byte);
6924       else if (! NILP (current_buffer->enable_multibyte_characters))
6925         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6926                           saved_pt_byte + (coding->produced - bytes));
6927       else
6928         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6929                           saved_pt_byte + (coding->produced - bytes));
6930     }
6931
6932   unbind_to (count, Qnil);
6933 }
6934
6935
6936 Lisp_Object
6937 preferred_coding_system ()
6938 {
6939   int id = coding_categories[coding_priorities[0]].id;
6940
6941   return CODING_ID_NAME (id);
6942 }
6943
6944 \f
6945 #ifdef emacs
6946 /*** 8. Emacs Lisp library functions ***/
6947
6948 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6949        doc: /* Return t if OBJECT is nil or a coding-system.
6950 See the documentation of `define-coding-system' for information
6951 about coding-system objects.  */)
6952      (obj)
6953      Lisp_Object obj;
6954 {
6955   return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6956 }
6957
6958 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6959        Sread_non_nil_coding_system, 1, 1, 0,
6960        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6961      (prompt)
6962      Lisp_Object prompt;
6963 {
6964   Lisp_Object val;
6965   do
6966     {
6967       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6968                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6969     }
6970   while (SCHARS (val) == 0);
6971   return (Fintern (val, Qnil));
6972 }
6973
6974 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6975        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6976 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6977      (prompt, default_coding_system)
6978      Lisp_Object prompt, default_coding_system;
6979 {
6980   Lisp_Object val;
6981   if (SYMBOLP (default_coding_system))
6982     XSETSTRING (default_coding_system, XPNTR (SYMBOL_NAME (default_coding_system)));
6983   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6984                           Qt, Qnil, Qcoding_system_history,
6985                           default_coding_system, Qnil);
6986   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
6987 }
6988
6989 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6990        1, 1, 0,
6991        doc: /* Check validity of CODING-SYSTEM.
6992 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.  */)
6993   (coding_system)
6994      Lisp_Object coding_system;
6995 {
6996   CHECK_SYMBOL (coding_system);
6997   if (!NILP (Fcoding_system_p (coding_system)))
6998     return coding_system;
6999   while (1)
7000     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
7001 }
7002
7003 \f
7004 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
7005    HIGHEST is nonzero, return the coding system of the highest
7006    priority among the detected coding systems.  Otherwize return a
7007    list of detected coding systems sorted by their priorities.  If
7008    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7009    multibyte form but contains only ASCII and eight-bit chars.
7010    Otherwise, the bytes are raw bytes.
7011
7012    CODING-SYSTEM controls the detection as below:
7013
7014    If it is nil, detect both text-format and eol-format.  If the
7015    text-format part of CODING-SYSTEM is already specified
7016    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
7017    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7018    detect only text-format.  */
7019
7020 Lisp_Object
7021 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7022                       coding_system)
7023      const unsigned char *src;
7024      int src_chars, src_bytes, highest;
7025      int multibytep;
7026      Lisp_Object coding_system;
7027 {
7028   const unsigned char *src_end = src + src_bytes;
7029   Lisp_Object attrs, eol_type;
7030   Lisp_Object val;
7031   struct coding_system coding;
7032   int id;
7033   struct coding_detection_info detect_info;
7034   enum coding_category base_category;
7035
7036   if (NILP (coding_system))
7037     coding_system = Qundecided;
7038   setup_coding_system (coding_system, &coding);
7039   attrs = CODING_ID_ATTRS (coding.id);
7040   eol_type = CODING_ID_EOL_TYPE (coding.id);
7041   coding_system = CODING_ATTR_BASE_NAME (attrs);
7042
7043   coding.source = src;
7044   coding.src_chars = src_chars;
7045   coding.src_bytes = src_bytes;
7046   coding.src_multibyte = multibytep;
7047   coding.consumed = 0;
7048   coding.mode |= CODING_MODE_LAST_BLOCK;
7049
7050   detect_info.checked = detect_info.found = detect_info.rejected = 0;
7051
7052   /* At first, detect text-format if necessary.  */
7053   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7054   if (base_category == coding_category_undecided)
7055     {
7056       enum coding_category category;
7057       struct coding_system *this;
7058       int c, i;
7059
7060       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
7061       for (i = 0; src < src_end; i++, src++)
7062         {
7063           c = *src;
7064           if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
7065                                         || c == ISO_CODE_SI
7066                                         || c == ISO_CODE_SO)))
7067             break;
7068         }
7069       coding.head_ascii = src - coding.source;
7070
7071       if (src < src_end)
7072         for (i = 0; i < coding_category_raw_text; i++)
7073           {
7074             category = coding_priorities[i];
7075             this = coding_categories + category;
7076
7077             if (this->id < 0)
7078               {
7079                 /* No coding system of this category is defined.  */
7080                 detect_info.rejected |= (1 << category);
7081               }
7082             else if (category >= coding_category_raw_text)
7083               continue;
7084             else if (detect_info.checked & (1 << category))
7085               {
7086                 if (highest
7087                     && (detect_info.found & (1 << category)))
7088                   break;
7089               }
7090             else
7091               {
7092                 if ((*(this->detector)) (&coding, &detect_info)
7093                     && highest
7094                     && (detect_info.found & (1 << category)))
7095                   {
7096                     if (category == coding_category_utf_16_auto)
7097                       {
7098                         if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7099                           category = coding_category_utf_16_le;
7100                         else
7101                           category = coding_category_utf_16_be;
7102                       }
7103                     break;
7104                   }
7105               }
7106           }
7107
7108       if (detect_info.rejected == CATEGORY_MASK_ANY)
7109         {
7110           detect_info.found = CATEGORY_MASK_RAW_TEXT;
7111           id = coding_categories[coding_category_raw_text].id;
7112           val = Fcons (make_number (id), Qnil);
7113         }
7114       else if (! detect_info.rejected && ! detect_info.found)
7115         {
7116           detect_info.found = CATEGORY_MASK_ANY;
7117           id = coding_categories[coding_category_undecided].id;
7118           val = Fcons (make_number (id), Qnil);
7119         }
7120       else if (highest)
7121         {
7122           if (detect_info.found)
7123             {
7124               detect_info.found = 1 << category;
7125               val = Fcons (make_number (this->id), Qnil);
7126             }
7127           else
7128             for (i = 0; i < coding_category_raw_text; i++)
7129               if (! (detect_info.rejected & (1 << coding_priorities[i])))
7130                 {
7131                   detect_info.found = 1 << coding_priorities[i];
7132                   id = coding_categories[coding_priorities[i]].id;
7133                   val = Fcons (make_number (id), Qnil);
7134                   break;
7135                 }
7136         }
7137       else
7138         {
7139           int mask = detect_info.rejected | detect_info.found;
7140           int found = 0;
7141           val = Qnil;
7142
7143           for (i = coding_category_raw_text - 1; i >= 0; i--)
7144             {
7145               category = coding_priorities[i];
7146               if (! (mask & (1 << category)))
7147                 {
7148                   found |= 1 << category;
7149                   id = coding_categories[category].id;
7150                   val = Fcons (make_number (id), val);
7151                 }
7152             }
7153           for (i = coding_category_raw_text - 1; i >= 0; i--)
7154             {
7155               category = coding_priorities[i];
7156               if (detect_info.found & (1 << category))
7157                 {
7158                   id = coding_categories[category].id;
7159                   val = Fcons (make_number (id), val);
7160                 }
7161             }
7162           detect_info.found |= found;
7163         }
7164     }
7165   else if (base_category == coding_category_utf_16_auto)
7166     {
7167       if (detect_coding_utf_16 (&coding, &detect_info))
7168         {
7169           enum coding_category category;
7170           struct coding_system *this;
7171
7172           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7173             this = coding_categories + coding_category_utf_16_le;
7174           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7175             this = coding_categories + coding_category_utf_16_be;
7176           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7177             this = coding_categories + coding_category_utf_16_be_nosig;
7178           else
7179             this = coding_categories + coding_category_utf_16_le_nosig;
7180           val = Fcons (make_number (this->id), Qnil);
7181         }
7182     }
7183   else
7184     {
7185       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
7186       val = Fcons (make_number (coding.id), Qnil);
7187     }
7188
7189   /* Then, detect eol-format if necessary.  */
7190   {
7191     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
7192     Lisp_Object tail;
7193
7194     if (VECTORP (eol_type))
7195       {
7196         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
7197           normal_eol = detect_eol (coding.source, src_bytes,
7198                                    coding_category_raw_text);
7199         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7200                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
7201           utf_16_be_eol = detect_eol (coding.source, src_bytes,
7202                                       coding_category_utf_16_be);
7203         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7204                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
7205           utf_16_le_eol = detect_eol (coding.source, src_bytes,
7206                                       coding_category_utf_16_le);
7207       }
7208     else
7209       {
7210         if (EQ (eol_type, Qunix))
7211           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7212         else if (EQ (eol_type, Qdos))
7213           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7214         else
7215           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7216       }
7217
7218     for (tail = val; CONSP (tail); tail = XCDR (tail))
7219       {
7220         enum coding_category category;
7221         int this_eol;
7222
7223         id = XINT (XCAR (tail));
7224         attrs = CODING_ID_ATTRS (id);
7225         category = XINT (CODING_ATTR_CATEGORY (attrs));
7226         eol_type = CODING_ID_EOL_TYPE (id);
7227         if (VECTORP (eol_type))
7228           {
7229             if (category == coding_category_utf_16_be
7230                 || category == coding_category_utf_16_be_nosig)
7231               this_eol = utf_16_be_eol;
7232             else if (category == coding_category_utf_16_le
7233                      || category == coding_category_utf_16_le_nosig)
7234               this_eol = utf_16_le_eol;
7235             else
7236               this_eol = normal_eol;
7237
7238             if (this_eol == EOL_SEEN_LF)
7239               XSETCAR (tail, AREF (eol_type, 0));
7240             else if (this_eol == EOL_SEEN_CRLF)
7241               XSETCAR (tail, AREF (eol_type, 1));
7242             else if (this_eol == EOL_SEEN_CR)
7243               XSETCAR (tail, AREF (eol_type, 2));
7244             else
7245               XSETCAR (tail, CODING_ID_NAME (id));
7246           }
7247         else
7248           XSETCAR (tail, CODING_ID_NAME (id));
7249       }
7250   }
7251
7252   return (highest ? XCAR (val) : val);
7253 }
7254
7255
7256 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7257        2, 3, 0,
7258        doc: /* Detect coding system of the text in the region between START and END.
7259 Return a list of possible coding systems ordered by priority.
7260
7261 If only ASCII characters are found, it returns a list of single element
7262 `undecided' or its subsidiary coding system according to a detected
7263 end-of-line format.
7264
7265 If optional argument HIGHEST is non-nil, return the coding system of
7266 highest priority.  */)
7267      (start, end, highest)
7268      Lisp_Object start, end, highest;
7269 {
7270   int from, to;
7271   int from_byte, to_byte;
7272
7273   CHECK_NUMBER_COERCE_MARKER (start);
7274   CHECK_NUMBER_COERCE_MARKER (end);
7275
7276   validate_region (&start, &end);
7277   from = XINT (start), to = XINT (end);
7278   from_byte = CHAR_TO_BYTE (from);
7279   to_byte = CHAR_TO_BYTE (to);
7280
7281   if (from < GPT && to >= GPT)
7282     move_gap_both (to, to_byte);
7283
7284   return detect_coding_system (BYTE_POS_ADDR (from_byte),
7285                                to - from, to_byte - from_byte,
7286                                !NILP (highest),
7287                                !NILP (current_buffer
7288                                       ->enable_multibyte_characters),
7289                                Qnil);
7290 }
7291
7292 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7293        1, 2, 0,
7294        doc: /* Detect coding system of the text in STRING.
7295 Return a list of possible coding systems ordered by priority.
7296
7297 If only ASCII characters are found, it returns a list of single element
7298 `undecided' or its subsidiary coding system according to a detected
7299 end-of-line format.
7300
7301 If optional argument HIGHEST is non-nil, return the coding system of
7302 highest priority.  */)
7303      (string, highest)
7304      Lisp_Object string, highest;
7305 {
7306   CHECK_STRING (string);
7307
7308   return detect_coding_system (SDATA (string),
7309                                SCHARS (string), SBYTES (string),
7310                                !NILP (highest), STRING_MULTIBYTE (string),
7311                                Qnil);
7312 }
7313
7314
7315 static INLINE int
7316 char_encodable_p (c, attrs)
7317      int c;
7318      Lisp_Object attrs;
7319 {
7320   Lisp_Object tail;
7321   struct charset *charset;
7322   Lisp_Object translation_table;
7323
7324   translation_table = CODING_ATTR_TRANS_TBL (attrs);
7325   if (! NILP (translation_table))
7326     c = translate_char (translation_table, c);
7327   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7328        CONSP (tail); tail = XCDR (tail))
7329     {
7330       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7331       if (CHAR_CHARSET_P (c, charset))
7332         break;
7333     }
7334   return (! NILP (tail));
7335 }
7336
7337
7338 /* Return a list of coding systems that safely encode the text between
7339    START and END.  If EXCLUDE is non-nil, it is a list of coding
7340    systems not to check.  The returned list doesn't contain any such
7341    coding systems.  In any case, if the text contains only ASCII or is
7342    unibyte, return t.  */
7343
7344 DEFUN ("find-coding-systems-region-internal",
7345        Ffind_coding_systems_region_internal,
7346        Sfind_coding_systems_region_internal, 2, 3, 0,
7347        doc: /* Internal use only.  */)
7348      (start, end, exclude)
7349      Lisp_Object start, end, exclude;
7350 {
7351   Lisp_Object coding_attrs_list, safe_codings;
7352   EMACS_INT start_byte, end_byte;
7353   const unsigned char *p, *pbeg, *pend;
7354   int c;
7355   Lisp_Object tail, elt;
7356
7357   if (STRINGP (start))
7358     {
7359       if (!STRING_MULTIBYTE (start)
7360           || SCHARS (start) == SBYTES (start))
7361         return Qt;
7362       start_byte = 0;
7363       end_byte = SBYTES (start);
7364     }
7365   else
7366     {
7367       CHECK_NUMBER_COERCE_MARKER (start);
7368       CHECK_NUMBER_COERCE_MARKER (end);
7369       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7370         args_out_of_range (start, end);
7371       if (NILP (current_buffer->enable_multibyte_characters))
7372         return Qt;
7373       start_byte = CHAR_TO_BYTE (XINT (start));
7374       end_byte = CHAR_TO_BYTE (XINT (end));
7375       if (XINT (end) - XINT (start) == end_byte - start_byte)
7376         return Qt;
7377
7378       if (XINT (start) < GPT && XINT (end) > GPT)
7379         {
7380           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7381             move_gap_both (XINT (start), start_byte);
7382           else
7383             move_gap_both (XINT (end), end_byte);
7384         }
7385     }
7386
7387   coding_attrs_list = Qnil;
7388   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7389     if (NILP (exclude)
7390         || NILP (Fmemq (XCAR (tail), exclude)))
7391       {
7392         Lisp_Object attrs;
7393
7394         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7395         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7396             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7397           {
7398             ASET (attrs, coding_attr_trans_tbl,
7399                   get_translation_table (attrs, 1, NULL));
7400             coding_attrs_list = Fcons (attrs, coding_attrs_list);
7401           }
7402       }
7403
7404   if (STRINGP (start))
7405     p = pbeg = SDATA (start);
7406   else
7407     p = pbeg = BYTE_POS_ADDR (start_byte);
7408   pend = p + (end_byte - start_byte);
7409
7410   while (p < pend && ASCII_BYTE_P (*p)) p++;
7411   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7412
7413   while (p < pend)
7414     {
7415       if (ASCII_BYTE_P (*p))
7416         p++;
7417       else
7418         {
7419           c = STRING_CHAR_ADVANCE (p);
7420
7421           charset_map_loaded = 0;
7422           for (tail = coding_attrs_list; CONSP (tail);)
7423             {
7424               elt = XCAR (tail);
7425               if (NILP (elt))
7426                 tail = XCDR (tail);
7427               else if (char_encodable_p (c, elt))
7428                 tail = XCDR (tail);
7429               else if (CONSP (XCDR (tail)))
7430                 {
7431                   XSETCAR (tail, XCAR (XCDR (tail)));
7432                   XSETCDR (tail, XCDR (XCDR (tail)));
7433                 }
7434               else
7435                 {
7436                   XSETCAR (tail, Qnil);
7437                   tail = XCDR (tail);
7438                 }
7439             }
7440           if (charset_map_loaded)
7441             {
7442               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7443
7444               if (STRINGP (start))
7445                 pbeg = SDATA (start);
7446               else
7447                 pbeg = BYTE_POS_ADDR (start_byte);
7448               p = pbeg + p_offset;
7449               pend = pbeg + pend_offset;
7450             }
7451         }
7452     }
7453
7454   safe_codings = Qnil;
7455   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7456     if (! NILP (XCAR (tail)))
7457       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
7458
7459   return safe_codings;
7460 }
7461
7462
7463 DEFUN ("unencodable-char-position", Funencodable_char_position,
7464        Sunencodable_char_position, 3, 5, 0,
7465        doc: /*
7466 Return position of first un-encodable character in a region.
7467 START and END specfiy the region and CODING-SYSTEM specifies the
7468 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
7469
7470 If optional 4th argument COUNT is non-nil, it specifies at most how
7471 many un-encodable characters to search.  In this case, the value is a
7472 list of positions.
7473
7474 If optional 5th argument STRING is non-nil, it is a string to search
7475 for un-encodable characters.  In that case, START and END are indexes
7476 to the string.  */)
7477      (start, end, coding_system, count, string)
7478      Lisp_Object start, end, coding_system, count, string;
7479 {
7480   int n;
7481   struct coding_system coding;
7482   Lisp_Object attrs, charset_list, translation_table;
7483   Lisp_Object positions;
7484   int from, to;
7485   const unsigned char *p, *stop, *pend;
7486   int ascii_compatible;
7487
7488   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7489   attrs = CODING_ID_ATTRS (coding.id);
7490   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7491     return Qnil;
7492   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7493   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7494   translation_table = get_translation_table (attrs, 1, NULL);
7495
7496   if (NILP (string))
7497     {
7498       validate_region (&start, &end);
7499       from = XINT (start);
7500       to = XINT (end);
7501       if (NILP (current_buffer->enable_multibyte_characters)
7502           || (ascii_compatible
7503               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7504         return Qnil;
7505       p = CHAR_POS_ADDR (from);
7506       pend = CHAR_POS_ADDR (to);
7507       if (from < GPT && to >= GPT)
7508         stop = GPT_ADDR;
7509       else
7510         stop = pend;
7511     }
7512   else
7513     {
7514       CHECK_STRING (string);
7515       CHECK_NATNUM (start);
7516       CHECK_NATNUM (end);
7517       from = XINT (start);
7518       to = XINT (end);
7519       if (from > to
7520           || to > SCHARS (string))
7521         args_out_of_range_3 (string, start, end);
7522       if (! STRING_MULTIBYTE (string))
7523         return Qnil;
7524       p = SDATA (string) + string_char_to_byte (string, from);
7525       stop = pend = SDATA (string) + string_char_to_byte (string, to);
7526       if (ascii_compatible && (to - from) == (pend - p))
7527         return Qnil;
7528     }
7529
7530   if (NILP (count))
7531     n = 1;
7532   else
7533     {
7534       CHECK_NATNUM (count);
7535       n = XINT (count);
7536     }
7537
7538   positions = Qnil;
7539   while (1)
7540     {
7541       int c;
7542
7543       if (ascii_compatible)
7544         while (p < stop && ASCII_BYTE_P (*p))
7545           p++, from++;
7546       if (p >= stop)
7547         {
7548           if (p >= pend)
7549             break;
7550           stop = pend;
7551           p = GAP_END_ADDR;
7552         }
7553
7554       c = STRING_CHAR_ADVANCE (p);
7555       if (! (ASCII_CHAR_P (c) && ascii_compatible)
7556           && ! char_charset (translate_char (translation_table, c),
7557                              charset_list, NULL))
7558         {
7559           positions = Fcons (make_number (from), positions);
7560           n--;
7561           if (n == 0)
7562             break;
7563         }
7564
7565       from++;
7566     }
7567
7568   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
7569 }
7570
7571
7572 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7573        Scheck_coding_systems_region, 3, 3, 0,
7574        doc: /* Check if the region is encodable by coding systems.
7575
7576 START and END are buffer positions specifying the region.
7577 CODING-SYSTEM-LIST is a list of coding systems to check.
7578
7579 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7580 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7581 whole region, POS0, POS1, ... are buffer positions where non-encodable
7582 characters are found.
7583
7584 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7585 value is nil.
7586
7587 START may be a string.  In that case, check if the string is
7588 encodable, and the value contains indices to the string instead of
7589 buffer positions.  END is ignored.  */)
7590      (start, end, coding_system_list)
7591      Lisp_Object start, end, coding_system_list;
7592 {
7593   Lisp_Object list;
7594   EMACS_INT start_byte, end_byte;
7595   int pos;
7596   const unsigned char *p, *pbeg, *pend;
7597   int c;
7598   Lisp_Object tail, elt, attrs;
7599
7600   if (STRINGP (start))
7601     {
7602       if (!STRING_MULTIBYTE (start)
7603           && SCHARS (start) != SBYTES (start))
7604         return Qnil;
7605       start_byte = 0;
7606       end_byte = SBYTES (start);
7607       pos = 0;
7608     }
7609   else
7610     {
7611       CHECK_NUMBER_COERCE_MARKER (start);
7612       CHECK_NUMBER_COERCE_MARKER (end);
7613       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7614         args_out_of_range (start, end);
7615       if (NILP (current_buffer->enable_multibyte_characters))
7616         return Qnil;
7617       start_byte = CHAR_TO_BYTE (XINT (start));
7618       end_byte = CHAR_TO_BYTE (XINT (end));
7619       if (XINT (end) - XINT (start) == end_byte - start_byte)
7620         return Qt;
7621
7622       if (XINT (start) < GPT && XINT (end) > GPT)
7623         {
7624           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7625             move_gap_both (XINT (start), start_byte);
7626           else
7627             move_gap_both (XINT (end), end_byte);
7628         }
7629       pos = XINT (start);
7630     }
7631
7632   list = Qnil;
7633   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7634     {
7635       elt = XCAR (tail);
7636       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
7637       ASET (attrs, coding_attr_trans_tbl,
7638             get_translation_table (attrs, 1, NULL));
7639       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
7640     }
7641
7642   if (STRINGP (start))
7643     p = pbeg = SDATA (start);
7644   else
7645     p = pbeg = BYTE_POS_ADDR (start_byte);
7646   pend = p + (end_byte - start_byte);
7647
7648   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7649   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7650
7651   while (p < pend)
7652     {
7653       if (ASCII_BYTE_P (*p))
7654         p++;
7655       else
7656         {
7657           c = STRING_CHAR_ADVANCE (p);
7658
7659           charset_map_loaded = 0;
7660           for (tail = list; CONSP (tail); tail = XCDR (tail))
7661             {
7662               elt = XCDR (XCAR (tail));
7663               if (! char_encodable_p (c, XCAR (elt)))
7664                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7665             }
7666           if (charset_map_loaded)
7667             {
7668               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7669
7670               if (STRINGP (start))
7671                 pbeg = SDATA (start);
7672               else
7673                 pbeg = BYTE_POS_ADDR (start_byte);
7674               p = pbeg + p_offset;
7675               pend = pbeg + pend_offset;
7676             }
7677         }
7678       pos++;
7679     }
7680
7681   tail = list;
7682   list = Qnil;
7683   for (; CONSP (tail); tail = XCDR (tail))
7684     {
7685       elt = XCAR (tail);
7686       if (CONSP (XCDR (XCDR (elt))))
7687         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7688                       list);
7689     }
7690
7691   return list;
7692 }
7693
7694
7695 Lisp_Object
7696 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7697      Lisp_Object start, end, coding_system, dst_object;
7698      int encodep, norecord;
7699 {
7700   struct coding_system coding;
7701   EMACS_INT from, from_byte, to, to_byte;
7702   Lisp_Object src_object;
7703
7704   CHECK_NUMBER_COERCE_MARKER (start);
7705   CHECK_NUMBER_COERCE_MARKER (end);
7706   if (NILP (coding_system))
7707     coding_system = Qno_conversion;
7708   else
7709     CHECK_CODING_SYSTEM (coding_system);
7710   src_object = Fcurrent_buffer ();
7711   if (NILP (dst_object))
7712     dst_object = src_object;
7713   else if (! EQ (dst_object, Qt))
7714     CHECK_BUFFER (dst_object);
7715
7716   validate_region (&start, &end);
7717   from = XFASTINT (start);
7718   from_byte = CHAR_TO_BYTE (from);
7719   to = XFASTINT (end);
7720   to_byte = CHAR_TO_BYTE (to);
7721
7722   setup_coding_system (coding_system, &coding);
7723   coding.mode |= CODING_MODE_LAST_BLOCK;
7724
7725   if (encodep)
7726     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7727                           dst_object);
7728   else
7729     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7730                           dst_object);
7731   if (! norecord)
7732     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7733
7734   return (BUFFERP (dst_object)
7735           ? make_number (coding.produced_char)
7736           : coding.dst_object);
7737 }
7738
7739
7740 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
7741        3, 4, "r\nzCoding system: ",
7742        doc: /* Decode the current region from the specified coding system.
7743 When called from a program, takes four arguments:
7744         START, END, CODING-SYSTEM, and DESTINATION.
7745 START and END are buffer positions.
7746
7747 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7748 If nil, the region between START and END is replace by the decoded text.
7749 If buffer, the decoded text is inserted in the buffer.
7750 If t, the decoded text is returned.
7751
7752 This function sets `last-coding-system-used' to the precise coding system
7753 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7754 not fully specified.)
7755 It returns the length of the decoded text.  */)
7756      (start, end, coding_system, destination)
7757      Lisp_Object start, end, coding_system, destination;
7758 {
7759   return code_convert_region (start, end, coding_system, destination, 0, 0);
7760 }
7761
7762 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
7763        3, 4, "r\nzCoding system: ",
7764        doc: /* Encode the current region by specified coding system.
7765 When called from a program, takes three arguments:
7766 START, END, and CODING-SYSTEM.  START and END are buffer positions.
7767
7768 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7769 If nil, the region between START and END is replace by the encoded text.
7770 If buffer, the encoded text is inserted in the buffer.
7771 If t, the encoded text is returned.
7772
7773 This function sets `last-coding-system-used' to the precise coding system
7774 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7775 not fully specified.)
7776 It returns the length of the encoded text.  */)
7777   (start, end, coding_system, destination)
7778      Lisp_Object start, end, coding_system, destination;
7779 {
7780   return code_convert_region (start, end, coding_system, destination, 1, 0);
7781 }
7782
7783 Lisp_Object
7784 code_convert_string (string, coding_system, dst_object,
7785                      encodep, nocopy, norecord)
7786      Lisp_Object string, coding_system, dst_object;
7787      int encodep, nocopy, norecord;
7788 {
7789   struct coding_system coding;
7790   EMACS_INT chars, bytes;
7791
7792   CHECK_STRING (string);
7793   if (NILP (coding_system))
7794     {
7795       if (! norecord)
7796         Vlast_coding_system_used = Qno_conversion;
7797       if (NILP (dst_object))
7798         return (nocopy ? Fcopy_sequence (string) : string);
7799     }
7800
7801   if (NILP (coding_system))
7802     coding_system = Qno_conversion;
7803   else
7804     CHECK_CODING_SYSTEM (coding_system);
7805   if (NILP (dst_object))
7806     dst_object = Qt;
7807   else if (! EQ (dst_object, Qt))
7808     CHECK_BUFFER (dst_object);
7809
7810   setup_coding_system (coding_system, &coding);
7811   coding.mode |= CODING_MODE_LAST_BLOCK;
7812   chars = SCHARS (string);
7813   bytes = SBYTES (string);
7814   if (encodep)
7815     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7816   else
7817     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7818   if (! norecord)
7819     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7820
7821   return (BUFFERP (dst_object)
7822           ? make_number (coding.produced_char)
7823           : coding.dst_object);
7824 }
7825
7826
7827 /* Encode or decode STRING according to CODING_SYSTEM.
7828    Do not set Vlast_coding_system_used.
7829
7830    This function is called only from macros DECODE_FILE and
7831    ENCODE_FILE, thus we ignore character composition.  */
7832
7833 Lisp_Object
7834 code_convert_string_norecord (string, coding_system, encodep)
7835      Lisp_Object string, coding_system;
7836      int encodep;
7837 {
7838   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
7839 }
7840
7841
7842 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7843        2, 4, 0,
7844        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7845
7846 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7847 if the decoding operation is trivial.
7848
7849 Optional fourth arg BUFFER non-nil meant that the decoded text is
7850 inserted in BUFFER instead of returned as a string.  In this case,
7851 the return value is BUFFER.
7852
7853 This function sets `last-coding-system-used' to the precise coding system
7854 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7855 not fully specified.  */)
7856   (string, coding_system, nocopy, buffer)
7857      Lisp_Object string, coding_system, nocopy, buffer;
7858 {
7859   return code_convert_string (string, coding_system, buffer,
7860                               0, ! NILP (nocopy), 0);
7861 }
7862
7863 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7864        2, 4, 0,
7865        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7866
7867 Optional third arg NOCOPY non-nil means it is OK to return STRING
7868 itself if the encoding operation is trivial.
7869
7870 Optional fourth arg BUFFER non-nil meant that the encoded text is
7871 inserted in BUFFER instead of returned as a string.  In this case,
7872 the return value is BUFFER.
7873
7874 This function sets `last-coding-system-used' to the precise coding system
7875 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7876 not fully specified.)  */)
7877      (string, coding_system, nocopy, buffer)
7878      Lisp_Object string, coding_system, nocopy, buffer;
7879 {
7880   return code_convert_string (string, coding_system, buffer,
7881                               1, ! NILP (nocopy), 1);
7882 }
7883
7884 \f
7885 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7886        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7887 Return the corresponding character.  */)
7888      (code)
7889      Lisp_Object code;
7890 {
7891   Lisp_Object spec, attrs, val;
7892   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7893   int c;
7894
7895   CHECK_NATNUM (code);
7896   c = XFASTINT (code);
7897   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7898   attrs = AREF (spec, 0);
7899
7900   if (ASCII_BYTE_P (c)
7901       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7902     return code;
7903
7904   val = CODING_ATTR_CHARSET_LIST (attrs);
7905   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7906   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7907   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
7908
7909   if (c <= 0x7F)
7910     charset = charset_roman;
7911   else if (c >= 0xA0 && c < 0xDF)
7912     {
7913       charset = charset_kana;
7914       c -= 0x80;
7915     }
7916   else
7917     {
7918       int s1 = c >> 8, s2 = c & 0xFF;
7919
7920       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7921           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7922         error ("Invalid code: %d", code);
7923       SJIS_TO_JIS (c);
7924       charset = charset_kanji;
7925     }
7926   c = DECODE_CHAR (charset, c);
7927   if (c < 0)
7928     error ("Invalid code: %d", code);
7929   return make_number (c);
7930 }
7931
7932
7933 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7934        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7935 Return the corresponding code in SJIS.  */)
7936      (ch)
7937     Lisp_Object ch;
7938 {
7939   Lisp_Object spec, attrs, charset_list;
7940   int c;
7941   struct charset *charset;
7942   unsigned code;
7943
7944   CHECK_CHARACTER (ch);
7945   c = XFASTINT (ch);
7946   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7947   attrs = AREF (spec, 0);
7948
7949   if (ASCII_CHAR_P (c)
7950       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7951     return ch;
7952
7953   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7954   charset = char_charset (c, charset_list, &code);
7955   if (code == CHARSET_INVALID_CODE (charset))
7956     error ("Can't encode by shift_jis encoding: %d", c);
7957   JIS_TO_SJIS (code);
7958
7959   return make_number (code);
7960 }
7961
7962 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7963        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7964 Return the corresponding character.  */)
7965      (code)
7966      Lisp_Object code;
7967 {
7968   Lisp_Object spec, attrs, val;
7969   struct charset *charset_roman, *charset_big5, *charset;
7970   int c;
7971
7972   CHECK_NATNUM (code);
7973   c = XFASTINT (code);
7974   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7975   attrs = AREF (spec, 0);
7976
7977   if (ASCII_BYTE_P (c)
7978       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7979     return code;
7980
7981   val = CODING_ATTR_CHARSET_LIST (attrs);
7982   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7983   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7984
7985   if (c <= 0x7F)
7986     charset = charset_roman;
7987   else
7988     {
7989       int b1 = c >> 8, b2 = c & 0x7F;
7990       if (b1 < 0xA1 || b1 > 0xFE
7991           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7992         error ("Invalid code: %d", code);
7993       charset = charset_big5;
7994     }
7995   c = DECODE_CHAR (charset, (unsigned )c);
7996   if (c < 0)
7997     error ("Invalid code: %d", code);
7998   return make_number (c);
7999 }
8000
8001 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8002        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
8003 Return the corresponding character code in Big5.  */)
8004      (ch)
8005      Lisp_Object ch;
8006 {
8007   Lisp_Object spec, attrs, charset_list;
8008   struct charset *charset;
8009   int c;
8010   unsigned code;
8011
8012   CHECK_CHARACTER (ch);
8013   c = XFASTINT (ch);
8014   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8015   attrs = AREF (spec, 0);
8016   if (ASCII_CHAR_P (c)
8017       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8018     return ch;
8019
8020   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8021   charset = char_charset (c, charset_list, &code);
8022   if (code == CHARSET_INVALID_CODE (charset))
8023     error ("Can't encode by Big5 encoding: %d", c);
8024
8025   return make_number (code);
8026 }
8027
8028 \f
8029 DEFUN ("set-terminal-coding-system-internal",
8030        Fset_terminal_coding_system_internal,
8031        Sset_terminal_coding_system_internal, 1, 1, 0,
8032        doc: /* Internal use only.  */)
8033      (coding_system)
8034      Lisp_Object coding_system;
8035 {
8036   CHECK_SYMBOL (coding_system);
8037   setup_coding_system (Fcheck_coding_system (coding_system),
8038                         &terminal_coding);
8039
8040   /* We had better not send unsafe characters to terminal.  */
8041   terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
8042   /* Characer composition should be disabled.  */
8043   terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8044   terminal_coding.src_multibyte = 1;
8045   terminal_coding.dst_multibyte = 0;
8046   return Qnil;
8047 }
8048
8049 DEFUN ("set-safe-terminal-coding-system-internal",
8050        Fset_safe_terminal_coding_system_internal,
8051        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
8052        doc: /* Internal use only.  */)
8053      (coding_system)
8054      Lisp_Object coding_system;
8055 {
8056   CHECK_SYMBOL (coding_system);
8057   setup_coding_system (Fcheck_coding_system (coding_system),
8058                        &safe_terminal_coding);
8059   /* Characer composition should be disabled.  */
8060   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8061   safe_terminal_coding.src_multibyte = 1;
8062   safe_terminal_coding.dst_multibyte = 0;
8063   return Qnil;
8064 }
8065
8066 DEFUN ("terminal-coding-system",
8067        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
8068        doc: /* Return coding system specified for terminal output.  */)
8069      ()
8070 {
8071   return CODING_ID_NAME (terminal_coding.id);
8072 }
8073
8074 DEFUN ("set-keyboard-coding-system-internal",
8075        Fset_keyboard_coding_system_internal,
8076        Sset_keyboard_coding_system_internal, 1, 1, 0,
8077        doc: /* Internal use only.  */)
8078      (coding_system)
8079      Lisp_Object coding_system;
8080 {
8081   CHECK_SYMBOL (coding_system);
8082   setup_coding_system (Fcheck_coding_system (coding_system),
8083                        &keyboard_coding);
8084   /* Characer composition should be disabled.  */
8085   keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8086   return Qnil;
8087 }
8088
8089 DEFUN ("keyboard-coding-system",
8090        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
8091        doc: /* Return coding system specified for decoding keyboard input.  */)
8092      ()
8093 {
8094   return CODING_ID_NAME (keyboard_coding.id);
8095 }
8096
8097 \f
8098 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8099        Sfind_operation_coding_system,  1, MANY, 0,
8100        doc: /* Choose a coding system for an operation based on the target name.
8101 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8102 DECODING-SYSTEM is the coding system to use for decoding
8103 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8104 for encoding (in case OPERATION does encoding).
8105
8106 The first argument OPERATION specifies an I/O primitive:
8107   For file I/O, `insert-file-contents' or `write-region'.
8108   For process I/O, `call-process', `call-process-region', or `start-process'.
8109   For network I/O, `open-network-stream'.
8110
8111 The remaining arguments should be the same arguments that were passed
8112 to the primitive.  Depending on which primitive, one of those arguments
8113 is selected as the TARGET.  For example, if OPERATION does file I/O,
8114 whichever argument specifies the file name is TARGET.
8115
8116 TARGET has a meaning which depends on OPERATION:
8117   For file I/O, TARGET is a file name.
8118   For process I/O, TARGET is a process name.
8119   For network I/O, TARGET is a service name or a port number
8120
8121 This function looks up what specified for TARGET in,
8122 `file-coding-system-alist', `process-coding-system-alist',
8123 or `network-coding-system-alist' depending on OPERATION.
8124 They may specify a coding system, a cons of coding systems,
8125 or a function symbol to call.
8126 In the last case, we call the function with one argument,
8127 which is a list of all the arguments given to this function.
8128
8129 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
8130      (nargs, args)
8131      int nargs;
8132      Lisp_Object *args;
8133 {
8134   Lisp_Object operation, target_idx, target, val;
8135   register Lisp_Object chain;
8136
8137   if (nargs < 2)
8138     error ("Too few arguments");
8139   operation = args[0];
8140   if (!SYMBOLP (operation)
8141       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
8142     error ("Invalid first arguement");
8143   if (nargs < 1 + XINT (target_idx))
8144     error ("Too few arguments for operation: %s",
8145            SDATA (SYMBOL_NAME (operation)));
8146   target = args[XINT (target_idx) + 1];
8147   if (!(STRINGP (target)
8148         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
8149     error ("Invalid %dth argument", XINT (target_idx) + 1);
8150
8151   chain = ((EQ (operation, Qinsert_file_contents)
8152             || EQ (operation, Qwrite_region))
8153            ? Vfile_coding_system_alist
8154            : (EQ (operation, Qopen_network_stream)
8155               ? Vnetwork_coding_system_alist
8156               : Vprocess_coding_system_alist));
8157   if (NILP (chain))
8158     return Qnil;
8159
8160   for (; CONSP (chain); chain = XCDR (chain))
8161     {
8162       Lisp_Object elt;
8163
8164       elt = XCAR (chain);
8165       if (CONSP (elt)
8166           && ((STRINGP (target)
8167                && STRINGP (XCAR (elt))
8168                && fast_string_match (XCAR (elt), target) >= 0)
8169               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
8170         {
8171           val = XCDR (elt);
8172           /* Here, if VAL is both a valid coding system and a valid
8173              function symbol, we return VAL as a coding system.  */
8174           if (CONSP (val))
8175             return val;
8176           if (! SYMBOLP (val))
8177             return Qnil;
8178           if (! NILP (Fcoding_system_p (val)))
8179             return Fcons (val, val);
8180           if (! NILP (Ffboundp (val)))
8181             {
8182               val = call1 (val, Flist (nargs, args));
8183               if (CONSP (val))
8184                 return val;
8185               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8186                 return Fcons (val, val);
8187             }
8188           return Qnil;
8189         }
8190     }
8191   return Qnil;
8192 }
8193
8194 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
8195        Sset_coding_system_priority, 0, MANY, 0,
8196        doc: /* Assign higher priority to the coding systems given as arguments.
8197 If multiple coding systems belongs to the same category,
8198 all but the first one are ignored.
8199
8200 usage: (set-coding-system-priority ...)  */)
8201      (nargs, args)
8202      int nargs;
8203      Lisp_Object *args;
8204 {
8205   int i, j;
8206   int changed[coding_category_max];
8207   enum coding_category priorities[coding_category_max];
8208
8209   bzero (changed, sizeof changed);
8210
8211   for (i = j = 0; i < nargs; i++)
8212     {
8213       enum coding_category category;
8214       Lisp_Object spec, attrs;
8215
8216       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8217       attrs = AREF (spec, 0);
8218       category = XINT (CODING_ATTR_CATEGORY (attrs));
8219       if (changed[category])
8220         /* Ignore this coding system because a coding system of the
8221            same category already had a higher priority.  */
8222         continue;
8223       changed[category] = 1;
8224       priorities[j++] = category;
8225       if (coding_categories[category].id >= 0
8226           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8227         setup_coding_system (args[i], &coding_categories[category]);
8228       Fset (AREF (Vcoding_category_table, category), args[i]);
8229     }
8230
8231   /* Now we have decided top J priorities.  Reflect the order of the
8232      original priorities to the remaining priorities.  */
8233
8234   for (i = j, j = 0; i < coding_category_max; i++, j++)
8235     {
8236       while (j < coding_category_max
8237              && changed[coding_priorities[j]])
8238         j++;
8239       if (j == coding_category_max)
8240         abort ();
8241       priorities[i] = coding_priorities[j];
8242     }
8243
8244   bcopy (priorities, coding_priorities, sizeof priorities);
8245
8246   /* Update `coding-category-list'.  */
8247   Vcoding_category_list = Qnil;
8248   for (i = coding_category_max - 1; i >= 0; i--)
8249     Vcoding_category_list
8250       = Fcons (AREF (Vcoding_category_table, priorities[i]),
8251                Vcoding_category_list);
8252
8253   return Qnil;
8254 }
8255
8256 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8257        Scoding_system_priority_list, 0, 1, 0,
8258        doc: /* Return a list of coding systems ordered by their priorities.
8259 HIGHESTP non-nil means just return the highest priority one.  */)
8260      (highestp)
8261      Lisp_Object highestp;
8262 {
8263   int i;
8264   Lisp_Object val;
8265
8266   for (i = 0, val = Qnil; i < coding_category_max; i++)
8267     {
8268       enum coding_category category = coding_priorities[i];
8269       int id = coding_categories[category].id;
8270       Lisp_Object attrs;
8271
8272       if (id < 0)
8273         continue;
8274       attrs = CODING_ID_ATTRS (id);
8275       if (! NILP (highestp))
8276         return CODING_ATTR_BASE_NAME (attrs);
8277       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8278     }
8279   return Fnreverse (val);
8280 }
8281
8282 static char *suffixes[] = { "-unix", "-dos", "-mac" };
8283
8284 static Lisp_Object
8285 make_subsidiaries (base)
8286      Lisp_Object base;
8287 {
8288   Lisp_Object subsidiaries;
8289   int base_name_len = SBYTES (SYMBOL_NAME (base));
8290   char *buf = (char *) alloca (base_name_len + 6);
8291   int i;
8292
8293   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
8294   subsidiaries = Fmake_vector (make_number (3), Qnil);
8295   for (i = 0; i < 3; i++)
8296     {
8297       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
8298       ASET (subsidiaries, i, intern (buf));
8299     }
8300   return subsidiaries;
8301 }
8302
8303
8304 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
8305        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
8306        doc: /* For internal use only.
8307 usage: (define-coding-system-internal ...)  */)
8308      (nargs, args)
8309      int nargs;
8310      Lisp_Object *args;
8311 {
8312   Lisp_Object name;
8313   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
8314   Lisp_Object attrs;            /* Vector of attributes.  */
8315   Lisp_Object eol_type;
8316   Lisp_Object aliases;
8317   Lisp_Object coding_type, charset_list, safe_charsets;
8318   enum coding_category category;
8319   Lisp_Object tail, val;
8320   int max_charset_id = 0;
8321   int i;
8322
8323   if (nargs < coding_arg_max)
8324     goto short_args;
8325
8326   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
8327
8328   name = args[coding_arg_name];
8329   CHECK_SYMBOL (name);
8330   CODING_ATTR_BASE_NAME (attrs) = name;
8331
8332   val = args[coding_arg_mnemonic];
8333   if (! STRINGP (val))
8334     CHECK_CHARACTER (val);
8335   CODING_ATTR_MNEMONIC (attrs) = val;
8336
8337   coding_type = args[coding_arg_coding_type];
8338   CHECK_SYMBOL (coding_type);
8339   CODING_ATTR_TYPE (attrs) = coding_type;
8340
8341   charset_list = args[coding_arg_charset_list];
8342   if (SYMBOLP (charset_list))
8343     {
8344       if (EQ (charset_list, Qiso_2022))
8345         {
8346           if (! EQ (coding_type, Qiso_2022))
8347             error ("Invalid charset-list");
8348           charset_list = Viso_2022_charset_list;
8349         }
8350       else if (EQ (charset_list, Qemacs_mule))
8351         {
8352           if (! EQ (coding_type, Qemacs_mule))
8353             error ("Invalid charset-list");
8354           charset_list = Vemacs_mule_charset_list;
8355         }
8356       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8357         if (max_charset_id < XFASTINT (XCAR (tail)))
8358           max_charset_id = XFASTINT (XCAR (tail));
8359     }
8360   else
8361     {
8362       charset_list = Fcopy_sequence (charset_list);
8363       for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
8364         {
8365           struct charset *charset;
8366
8367           val = Fcar (tail);
8368           CHECK_CHARSET_GET_CHARSET (val, charset);
8369           if (EQ (coding_type, Qiso_2022)
8370               ? CHARSET_ISO_FINAL (charset) < 0
8371               : EQ (coding_type, Qemacs_mule)
8372               ? CHARSET_EMACS_MULE_ID (charset) < 0
8373               : 0)
8374             error ("Can't handle charset `%s'",
8375                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8376
8377           XSETCAR (tail, make_number (charset->id));
8378           if (max_charset_id < charset->id)
8379             max_charset_id = charset->id;
8380         }
8381     }
8382   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
8383
8384   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8385                                 make_number (255));
8386   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8387     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
8388   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
8389
8390   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
8391
8392   val = args[coding_arg_decode_translation_table];
8393   if (! CHAR_TABLE_P (val) && ! CONSP (val))
8394     CHECK_SYMBOL (val);
8395   CODING_ATTR_DECODE_TBL (attrs) = val;
8396
8397   val = args[coding_arg_encode_translation_table];
8398   if (! CHAR_TABLE_P (val) && ! CONSP (val))
8399     CHECK_SYMBOL (val);
8400   CODING_ATTR_ENCODE_TBL (attrs) = val;
8401
8402   val = args[coding_arg_post_read_conversion];
8403   CHECK_SYMBOL (val);
8404   CODING_ATTR_POST_READ (attrs) = val;
8405
8406   val = args[coding_arg_pre_write_conversion];
8407   CHECK_SYMBOL (val);
8408   CODING_ATTR_PRE_WRITE (attrs) = val;
8409
8410   val = args[coding_arg_default_char];
8411   if (NILP (val))
8412     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8413   else
8414     {
8415       CHECK_CHARACTER (val);
8416       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8417     }
8418
8419   val = args[coding_arg_for_unibyte];
8420   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
8421
8422   val = args[coding_arg_plist];
8423   CHECK_LIST (val);
8424   CODING_ATTR_PLIST (attrs) = val;
8425
8426   if (EQ (coding_type, Qcharset))
8427     {
8428       /* Generate a lisp vector of 256 elements.  Each element is nil,
8429          integer, or a list of charset IDs.
8430
8431          If Nth element is nil, the byte code N is invalid in this
8432          coding system.
8433
8434          If Nth element is a number NUM, N is the first byte of a
8435          charset whose ID is NUM.
8436
8437          If Nth element is a list of charset IDs, N is the first byte
8438          of one of them.  The list is sorted by dimensions of the
8439          charsets.  A charset of smaller dimension comes firtst. */
8440       val = Fmake_vector (make_number (256), Qnil);
8441
8442       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8443         {
8444           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8445           int dim = CHARSET_DIMENSION (charset);
8446           int idx = (dim - 1) * 4;
8447
8448           if (CHARSET_ASCII_COMPATIBLE_P (charset))
8449             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8450
8451           for (i = charset->code_space[idx];
8452                i <= charset->code_space[idx + 1]; i++)
8453             {
8454               Lisp_Object tmp, tmp2;
8455               int dim2;
8456
8457               tmp = AREF (val, i);
8458               if (NILP (tmp))
8459                 tmp = XCAR (tail);
8460               else if (NUMBERP (tmp))
8461                 {
8462                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8463                   if (dim < dim2)
8464                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
8465                   else
8466                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
8467                 }
8468               else
8469                 {
8470                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8471                     {
8472                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8473                       if (dim < dim2)
8474                         break;
8475                     }
8476                   if (NILP (tmp2))
8477                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8478                   else
8479                     {
8480                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8481                       XSETCAR (tmp2, XCAR (tail));
8482                     }
8483                 }
8484               ASET (val, i, tmp);
8485             }
8486         }
8487       ASET (attrs, coding_attr_charset_valids, val);
8488       category = coding_category_charset;
8489     }
8490   else if (EQ (coding_type, Qccl))
8491     {
8492       Lisp_Object valids;
8493
8494       if (nargs < coding_arg_ccl_max)
8495         goto short_args;
8496
8497       val = args[coding_arg_ccl_decoder];
8498       CHECK_CCL_PROGRAM (val);
8499       if (VECTORP (val))
8500         val = Fcopy_sequence (val);
8501       ASET (attrs, coding_attr_ccl_decoder, val);
8502
8503       val = args[coding_arg_ccl_encoder];
8504       CHECK_CCL_PROGRAM (val);
8505       if (VECTORP (val))
8506         val = Fcopy_sequence (val);
8507       ASET (attrs, coding_attr_ccl_encoder, val);
8508
8509       val = args[coding_arg_ccl_valids];
8510       valids = Fmake_string (make_number (256), make_number (0));
8511       for (tail = val; !NILP (tail); tail = Fcdr (tail))
8512         {
8513           int from, to;
8514
8515           val = Fcar (tail);
8516           if (INTEGERP (val))
8517             {
8518               from = to = XINT (val);
8519               if (from < 0 || from > 255)
8520                 args_out_of_range_3 (val, make_number (0), make_number (255));
8521             }
8522           else
8523             {
8524               CHECK_CONS (val);
8525               CHECK_NATNUM_CAR (val);
8526               CHECK_NATNUM_CDR (val);
8527               from = XINT (XCAR (val));
8528               if (from > 255)
8529                 args_out_of_range_3 (XCAR (val),
8530                                      make_number (0), make_number (255));
8531               to = XINT (XCDR (val));
8532               if (to < from || to > 255)
8533                 args_out_of_range_3 (XCDR (val),
8534                                      XCAR (val), make_number (255));
8535             }
8536           for (i = from; i <= to; i++)
8537             SSET (valids, i, 1);
8538         }
8539       ASET (attrs, coding_attr_ccl_valids, valids);
8540
8541       category = coding_category_ccl;
8542     }
8543   else if (EQ (coding_type, Qutf_16))
8544     {
8545       Lisp_Object bom, endian;
8546
8547       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8548
8549       if (nargs < coding_arg_utf16_max)
8550         goto short_args;
8551
8552       bom = args[coding_arg_utf16_bom];
8553       if (! NILP (bom) && ! EQ (bom, Qt))
8554         {
8555           CHECK_CONS (bom);
8556           val = XCAR (bom);
8557           CHECK_CODING_SYSTEM (val);
8558           val = XCDR (bom);
8559           CHECK_CODING_SYSTEM (val);
8560         }
8561       ASET (attrs, coding_attr_utf_16_bom, bom);
8562
8563       endian = args[coding_arg_utf16_endian];
8564       CHECK_SYMBOL (endian);
8565       if (NILP (endian))
8566         endian = Qbig;
8567       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8568         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
8569       ASET (attrs, coding_attr_utf_16_endian, endian);
8570
8571       category = (CONSP (bom)
8572                   ? coding_category_utf_16_auto
8573                   : NILP (bom)
8574                   ? (EQ (endian, Qbig)
8575                      ? coding_category_utf_16_be_nosig
8576                      : coding_category_utf_16_le_nosig)
8577                   : (EQ (endian, Qbig)
8578                      ? coding_category_utf_16_be
8579                      : coding_category_utf_16_le));
8580     }
8581   else if (EQ (coding_type, Qiso_2022))
8582     {
8583       Lisp_Object initial, reg_usage, request, flags;
8584       int i;
8585
8586       if (nargs < coding_arg_iso2022_max)
8587         goto short_args;
8588
8589       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8590       CHECK_VECTOR (initial);
8591       for (i = 0; i < 4; i++)
8592         {
8593           val = Faref (initial, make_number (i));
8594           if (! NILP (val))
8595             {
8596               struct charset *charset;
8597
8598               CHECK_CHARSET_GET_CHARSET (val, charset);
8599               ASET (initial, i, make_number (CHARSET_ID (charset)));
8600               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8601                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8602             }
8603           else
8604             ASET (initial, i, make_number (-1));
8605         }
8606
8607       reg_usage = args[coding_arg_iso2022_reg_usage];
8608       CHECK_CONS (reg_usage);
8609       CHECK_NUMBER_CAR (reg_usage);
8610       CHECK_NUMBER_CDR (reg_usage);
8611
8612       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8613       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
8614         {
8615           int id;
8616           Lisp_Object tmp;
8617
8618           val = Fcar (tail);
8619           CHECK_CONS (val);
8620           tmp = XCAR (val);
8621           CHECK_CHARSET_GET_ID (tmp, id);
8622           CHECK_NATNUM_CDR (val);
8623           if (XINT (XCDR (val)) >= 4)
8624             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8625           XSETCAR (val, make_number (id));
8626         }
8627
8628       flags = args[coding_arg_iso2022_flags];
8629       CHECK_NATNUM (flags);
8630       i = XINT (flags);
8631       if (EQ (args[coding_arg_charset_list], Qiso_2022))
8632         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8633
8634       ASET (attrs, coding_attr_iso_initial, initial);
8635       ASET (attrs, coding_attr_iso_usage, reg_usage);
8636       ASET (attrs, coding_attr_iso_request, request);
8637       ASET (attrs, coding_attr_iso_flags, flags);
8638       setup_iso_safe_charsets (attrs);
8639
8640       if (i & CODING_ISO_FLAG_SEVEN_BITS)
8641         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8642                           | CODING_ISO_FLAG_SINGLE_SHIFT))
8643                     ? coding_category_iso_7_else
8644                     : EQ (args[coding_arg_charset_list], Qiso_2022)
8645                     ? coding_category_iso_7
8646                     : coding_category_iso_7_tight);
8647       else
8648         {
8649           int id = XINT (AREF (initial, 1));
8650
8651           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
8652                        || EQ (args[coding_arg_charset_list], Qiso_2022)
8653                        || id < 0)
8654                       ? coding_category_iso_8_else
8655                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8656                       ? coding_category_iso_8_1
8657                       : coding_category_iso_8_2);
8658         }
8659       if (category != coding_category_iso_8_1
8660           && category != coding_category_iso_8_2)
8661         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8662     }
8663   else if (EQ (coding_type, Qemacs_mule))
8664     {
8665       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8666         ASET (attrs, coding_attr_emacs_mule_full, Qt);
8667       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8668       category = coding_category_emacs_mule;
8669     }
8670   else if (EQ (coding_type, Qshift_jis))
8671     {
8672
8673       struct charset *charset;
8674
8675       if (XINT (Flength (charset_list)) != 3
8676           && XINT (Flength (charset_list)) != 4)
8677         error ("There should be three or four charsets");
8678
8679       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8680       if (CHARSET_DIMENSION (charset) != 1)
8681         error ("Dimension of charset %s is not one",
8682                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8683       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8684         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8685
8686       charset_list = XCDR (charset_list);
8687       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8688       if (CHARSET_DIMENSION (charset) != 1)
8689         error ("Dimension of charset %s is not one",
8690                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8691
8692       charset_list = XCDR (charset_list);
8693       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8694       if (CHARSET_DIMENSION (charset) != 2)
8695         error ("Dimension of charset %s is not two",
8696                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8697
8698       charset_list = XCDR (charset_list);
8699       if (! NILP (charset_list))
8700         {
8701           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8702           if (CHARSET_DIMENSION (charset) != 2)
8703             error ("Dimension of charset %s is not two",
8704                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8705         }
8706
8707       category = coding_category_sjis;
8708       Vsjis_coding_system = name;
8709     }
8710   else if (EQ (coding_type, Qbig5))
8711     {
8712       struct charset *charset;
8713
8714       if (XINT (Flength (charset_list)) != 2)
8715         error ("There should be just two charsets");
8716
8717       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8718       if (CHARSET_DIMENSION (charset) != 1)
8719         error ("Dimension of charset %s is not one",
8720                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8721       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8722         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8723
8724       charset_list = XCDR (charset_list);
8725       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8726       if (CHARSET_DIMENSION (charset) != 2)
8727         error ("Dimension of charset %s is not two",
8728                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8729
8730       category = coding_category_big5;
8731       Vbig5_coding_system = name;
8732     }
8733   else if (EQ (coding_type, Qraw_text))
8734     {
8735       category = coding_category_raw_text;
8736       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8737     }
8738   else if (EQ (coding_type, Qutf_8))
8739     {
8740       category = coding_category_utf_8;
8741       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8742     }
8743   else if (EQ (coding_type, Qundecided))
8744     category = coding_category_undecided;
8745   else
8746     error ("Invalid coding system type: %s",
8747            SDATA (SYMBOL_NAME (coding_type)));
8748
8749   CODING_ATTR_CATEGORY (attrs) = make_number (category);
8750   CODING_ATTR_PLIST (attrs)
8751     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
8752                                 CODING_ATTR_PLIST (attrs)));
8753
8754   eol_type = args[coding_arg_eol_type];
8755   if (! NILP (eol_type)
8756       && ! EQ (eol_type, Qunix)
8757       && ! EQ (eol_type, Qdos)
8758       && ! EQ (eol_type, Qmac))
8759     error ("Invalid eol-type");
8760
8761   aliases = Fcons (name, Qnil);
8762
8763   if (NILP (eol_type))
8764     {
8765       eol_type = make_subsidiaries (name);
8766       for (i = 0; i < 3; i++)
8767         {
8768           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8769
8770           this_name = AREF (eol_type, i);
8771           this_aliases = Fcons (this_name, Qnil);
8772           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8773           this_spec = Fmake_vector (make_number (3), attrs);
8774           ASET (this_spec, 1, this_aliases);
8775           ASET (this_spec, 2, this_eol_type);
8776           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8777           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8778           Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8779                                         Vcoding_system_alist);
8780         }
8781     }
8782
8783   spec_vec = Fmake_vector (make_number (3), attrs);
8784   ASET (spec_vec, 1, aliases);
8785   ASET (spec_vec, 2, eol_type);
8786
8787   Fputhash (name, spec_vec, Vcoding_system_hash_table);
8788   Vcoding_system_list = Fcons (name, Vcoding_system_list);
8789   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8790                                 Vcoding_system_alist);
8791
8792   {
8793     int id = coding_categories[category].id;
8794
8795     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8796       setup_coding_system (name, &coding_categories[category]);
8797   }
8798
8799   return Qnil;
8800
8801  short_args:
8802   return Fsignal (Qwrong_number_of_arguments,
8803                   Fcons (intern ("define-coding-system-internal"),
8804                          make_number (nargs)));
8805 }
8806
8807
8808 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
8809        3, 3, 0,
8810        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
8811   (coding_system, prop, val)
8812      Lisp_Object coding_system, prop, val;
8813 {
8814   Lisp_Object spec, attrs, plist;
8815
8816   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8817   attrs = AREF (spec, 0);
8818   if (EQ (prop, QCmnemonic))
8819     {
8820       if (! STRINGP (val))
8821         CHECK_CHARACTER (val);
8822       CODING_ATTR_MNEMONIC (attrs) = val;
8823     }
8824   else if (EQ (prop, QCdefalut_char))
8825     {
8826       if (NILP (val))
8827         val = make_number (' ');
8828       else
8829         CHECK_CHARACTER (val);
8830       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8831     }
8832   else if (EQ (prop, QCdecode_translation_table))
8833     {
8834       if (! CHAR_TABLE_P (val) && ! CONSP (val))
8835         CHECK_SYMBOL (val);
8836       CODING_ATTR_DECODE_TBL (attrs) = val;
8837     }
8838   else if (EQ (prop, QCencode_translation_table))
8839     {
8840       if (! CHAR_TABLE_P (val) && ! CONSP (val))
8841         CHECK_SYMBOL (val);
8842       CODING_ATTR_ENCODE_TBL (attrs) = val;
8843     }
8844   else if (EQ (prop, QCpost_read_conversion))
8845     {
8846       CHECK_SYMBOL (val);
8847       CODING_ATTR_POST_READ (attrs) = val;
8848     }
8849   else if (EQ (prop, QCpre_write_conversion))
8850     {
8851       CHECK_SYMBOL (val);
8852       CODING_ATTR_PRE_WRITE (attrs) = val;
8853     }
8854
8855   CODING_ATTR_PLIST (attrs)
8856     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
8857   return val;
8858 }
8859
8860
8861 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8862        Sdefine_coding_system_alias, 2, 2, 0,
8863        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
8864      (alias, coding_system)
8865      Lisp_Object alias, coding_system;
8866 {
8867   Lisp_Object spec, aliases, eol_type;
8868
8869   CHECK_SYMBOL (alias);
8870   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8871   aliases = AREF (spec, 1);
8872   /* ALISES should be a list of length more than zero, and the first
8873      element is a base coding system.  Append ALIAS at the tail of the
8874      list.  */
8875   while (!NILP (XCDR (aliases)))
8876     aliases = XCDR (aliases);
8877   XSETCDR (aliases, Fcons (alias, Qnil));
8878
8879   eol_type = AREF (spec, 2);
8880   if (VECTORP (eol_type))
8881     {
8882       Lisp_Object subsidiaries;
8883       int i;
8884
8885       subsidiaries = make_subsidiaries (alias);
8886       for (i = 0; i < 3; i++)
8887         Fdefine_coding_system_alias (AREF (subsidiaries, i),
8888                                      AREF (eol_type, i));
8889     }
8890
8891   Fputhash (alias, spec, Vcoding_system_hash_table);
8892   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
8893   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8894                                 Vcoding_system_alist);
8895
8896   return Qnil;
8897 }
8898
8899 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8900        1, 1, 0,
8901        doc: /* Return the base of CODING-SYSTEM.
8902 Any alias or subsidiary coding system is not a base coding system.  */)
8903   (coding_system)
8904      Lisp_Object coding_system;
8905 {
8906   Lisp_Object spec, attrs;
8907
8908   if (NILP (coding_system))
8909     return (Qno_conversion);
8910   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8911   attrs = AREF (spec, 0);
8912   return CODING_ATTR_BASE_NAME (attrs);
8913 }
8914
8915 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8916        1, 1, 0,
8917        doc: "Return the property list of CODING-SYSTEM.")
8918      (coding_system)
8919      Lisp_Object coding_system;
8920 {
8921   Lisp_Object spec, attrs;
8922
8923   if (NILP (coding_system))
8924     coding_system = Qno_conversion;
8925   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8926   attrs = AREF (spec, 0);
8927   return CODING_ATTR_PLIST (attrs);
8928 }
8929
8930
8931 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8932        1, 1, 0,
8933        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
8934      (coding_system)
8935      Lisp_Object coding_system;
8936 {
8937   Lisp_Object spec;
8938
8939   if (NILP (coding_system))
8940     coding_system = Qno_conversion;
8941   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8942   return AREF (spec, 1);
8943 }
8944
8945 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8946        Scoding_system_eol_type, 1, 1, 0,
8947        doc: /* Return eol-type of CODING-SYSTEM.
8948 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8949
8950 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8951 and CR respectively.
8952
8953 A vector value indicates that a format of end-of-line should be
8954 detected automatically.  Nth element of the vector is the subsidiary
8955 coding system whose eol-type is N.  */)
8956      (coding_system)
8957      Lisp_Object coding_system;
8958 {
8959   Lisp_Object spec, eol_type;
8960   int n;
8961
8962   if (NILP (coding_system))
8963     coding_system = Qno_conversion;
8964   if (! CODING_SYSTEM_P (coding_system))
8965     return Qnil;
8966   spec = CODING_SYSTEM_SPEC (coding_system);
8967   eol_type = AREF (spec, 2);
8968   if (VECTORP (eol_type))
8969     return Fcopy_sequence (eol_type);
8970   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8971   return make_number (n);
8972 }
8973
8974 #endif /* emacs */
8975
8976 \f
8977 /*** 9. Post-amble ***/
8978
8979 void
8980 init_coding_once ()
8981 {
8982   int i;
8983
8984   for (i = 0; i < coding_category_max; i++)
8985     {
8986       coding_categories[i].id = -1;
8987       coding_priorities[i] = i;
8988     }
8989
8990   /* ISO2022 specific initialize routine.  */
8991   for (i = 0; i < 0x20; i++)
8992     iso_code_class[i] = ISO_control_0;
8993   for (i = 0x21; i < 0x7F; i++)
8994     iso_code_class[i] = ISO_graphic_plane_0;
8995   for (i = 0x80; i < 0xA0; i++)
8996     iso_code_class[i] = ISO_control_1;
8997   for (i = 0xA1; i < 0xFF; i++)
8998     iso_code_class[i] = ISO_graphic_plane_1;
8999   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9000   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
9001   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9002   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9003   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9004   iso_code_class[ISO_CODE_ESC] = ISO_escape;
9005   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9006   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9007   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9008
9009   for (i = 0; i < 256; i++)
9010     {
9011       emacs_mule_bytes[i] = 1;
9012     }
9013   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9014   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9015   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9016   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
9017 }
9018
9019 #ifdef emacs
9020
9021 void
9022 syms_of_coding ()
9023 {
9024   staticpro (&Vcoding_system_hash_table);
9025   {
9026     Lisp_Object args[2];
9027     args[0] = QCtest;
9028     args[1] = Qeq;
9029     Vcoding_system_hash_table = Fmake_hash_table (2, args);
9030   }
9031
9032   staticpro (&Vsjis_coding_system);
9033   Vsjis_coding_system = Qnil;
9034
9035   staticpro (&Vbig5_coding_system);
9036   Vbig5_coding_system = Qnil;
9037
9038   staticpro (&Vcode_conversion_reused_workbuf);
9039   Vcode_conversion_reused_workbuf = Qnil;
9040
9041   staticpro (&Vcode_conversion_workbuf_name);
9042   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
9043
9044   reused_workbuf_in_use = 0;
9045
9046   DEFSYM (Qcharset, "charset");
9047   DEFSYM (Qtarget_idx, "target-idx");
9048   DEFSYM (Qcoding_system_history, "coding-system-history");
9049   Fset (Qcoding_system_history, Qnil);
9050
9051   /* Target FILENAME is the first argument.  */
9052   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9053   /* Target FILENAME is the third argument.  */
9054   Fput (Qwrite_region, Qtarget_idx, make_number (2));
9055
9056   DEFSYM (Qcall_process, "call-process");
9057   /* Target PROGRAM is the first argument.  */
9058   Fput (Qcall_process, Qtarget_idx, make_number (0));
9059
9060   DEFSYM (Qcall_process_region, "call-process-region");
9061   /* Target PROGRAM is the third argument.  */
9062   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9063
9064   DEFSYM (Qstart_process, "start-process");
9065   /* Target PROGRAM is the third argument.  */
9066   Fput (Qstart_process, Qtarget_idx, make_number (2));
9067
9068   DEFSYM (Qopen_network_stream, "open-network-stream");
9069   /* Target SERVICE is the fourth argument.  */
9070   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9071
9072   DEFSYM (Qcoding_system, "coding-system");
9073   DEFSYM (Qcoding_aliases, "coding-aliases");
9074
9075   DEFSYM (Qeol_type, "eol-type");
9076   DEFSYM (Qunix, "unix");
9077   DEFSYM (Qdos, "dos");
9078
9079   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9080   DEFSYM (Qpost_read_conversion, "post-read-conversion");
9081   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9082   DEFSYM (Qdefault_char, "default-char");
9083   DEFSYM (Qundecided, "undecided");
9084   DEFSYM (Qno_conversion, "no-conversion");
9085   DEFSYM (Qraw_text, "raw-text");
9086
9087   DEFSYM (Qiso_2022, "iso-2022");
9088
9089   DEFSYM (Qutf_8, "utf-8");
9090   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
9091
9092   DEFSYM (Qutf_16, "utf-16");
9093   DEFSYM (Qbig, "big");
9094   DEFSYM (Qlittle, "little");
9095
9096   DEFSYM (Qshift_jis, "shift-jis");
9097   DEFSYM (Qbig5, "big5");
9098
9099   DEFSYM (Qcoding_system_p, "coding-system-p");
9100
9101   DEFSYM (Qcoding_system_error, "coding-system-error");
9102   Fput (Qcoding_system_error, Qerror_conditions,
9103         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9104   Fput (Qcoding_system_error, Qerror_message,
9105         build_string ("Invalid coding system"));
9106
9107   /* Intern this now in case it isn't already done.
9108      Setting this variable twice is harmless.
9109      But don't staticpro it here--that is done in alloc.c.  */
9110   Qchar_table_extra_slots = intern ("char-table-extra-slots");
9111
9112   DEFSYM (Qtranslation_table, "translation-table");
9113   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
9114   DEFSYM (Qtranslation_table_id, "translation-table-id");
9115   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9116   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
9117
9118   DEFSYM (Qvalid_codes, "valid-codes");
9119
9120   DEFSYM (Qemacs_mule, "emacs-mule");
9121
9122   DEFSYM (QCcategory, ":category");
9123   DEFSYM (QCmnemonic, ":mnemonic");
9124   DEFSYM (QCdefalut_char, ":default-char");
9125   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9126   DEFSYM (QCencode_translation_table, ":encode-translation-table");
9127   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9128   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
9129
9130   Vcoding_category_table
9131     = Fmake_vector (make_number (coding_category_max), Qnil);
9132   staticpro (&Vcoding_category_table);
9133   /* Followings are target of code detection.  */
9134   ASET (Vcoding_category_table, coding_category_iso_7,
9135         intern ("coding-category-iso-7"));
9136   ASET (Vcoding_category_table, coding_category_iso_7_tight,
9137         intern ("coding-category-iso-7-tight"));
9138   ASET (Vcoding_category_table, coding_category_iso_8_1,
9139         intern ("coding-category-iso-8-1"));
9140   ASET (Vcoding_category_table, coding_category_iso_8_2,
9141         intern ("coding-category-iso-8-2"));
9142   ASET (Vcoding_category_table, coding_category_iso_7_else,
9143         intern ("coding-category-iso-7-else"));
9144   ASET (Vcoding_category_table, coding_category_iso_8_else,
9145         intern ("coding-category-iso-8-else"));
9146   ASET (Vcoding_category_table, coding_category_utf_8,
9147         intern ("coding-category-utf-8"));
9148   ASET (Vcoding_category_table, coding_category_utf_16_be,
9149         intern ("coding-category-utf-16-be"));
9150   ASET (Vcoding_category_table, coding_category_utf_16_auto,
9151         intern ("coding-category-utf-16-auto"));
9152   ASET (Vcoding_category_table, coding_category_utf_16_le,
9153         intern ("coding-category-utf-16-le"));
9154   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9155         intern ("coding-category-utf-16-be-nosig"));
9156   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9157         intern ("coding-category-utf-16-le-nosig"));
9158   ASET (Vcoding_category_table, coding_category_charset,
9159         intern ("coding-category-charset"));
9160   ASET (Vcoding_category_table, coding_category_sjis,
9161         intern ("coding-category-sjis"));
9162   ASET (Vcoding_category_table, coding_category_big5,
9163         intern ("coding-category-big5"));
9164   ASET (Vcoding_category_table, coding_category_ccl,
9165         intern ("coding-category-ccl"));
9166   ASET (Vcoding_category_table, coding_category_emacs_mule,
9167         intern ("coding-category-emacs-mule"));
9168   /* Followings are NOT target of code detection.  */
9169   ASET (Vcoding_category_table, coding_category_raw_text,
9170         intern ("coding-category-raw-text"));
9171   ASET (Vcoding_category_table, coding_category_undecided,
9172         intern ("coding-category-undecided"));
9173
9174   DEFSYM (Qinsufficient_source, "insufficient-source");
9175   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9176   DEFSYM (Qinvalid_source, "invalid-source");
9177   DEFSYM (Qinterrupted, "interrupted");
9178   DEFSYM (Qinsufficient_memory, "insufficient-memory");
9179
9180   defsubr (&Scoding_system_p);
9181   defsubr (&Sread_coding_system);
9182   defsubr (&Sread_non_nil_coding_system);
9183   defsubr (&Scheck_coding_system);
9184   defsubr (&Sdetect_coding_region);
9185   defsubr (&Sdetect_coding_string);
9186   defsubr (&Sfind_coding_systems_region_internal);
9187   defsubr (&Sunencodable_char_position);
9188   defsubr (&Scheck_coding_systems_region);
9189   defsubr (&Sdecode_coding_region);
9190   defsubr (&Sencode_coding_region);
9191   defsubr (&Sdecode_coding_string);
9192   defsubr (&Sencode_coding_string);
9193   defsubr (&Sdecode_sjis_char);
9194   defsubr (&Sencode_sjis_char);
9195   defsubr (&Sdecode_big5_char);
9196   defsubr (&Sencode_big5_char);
9197   defsubr (&Sset_terminal_coding_system_internal);
9198   defsubr (&Sset_safe_terminal_coding_system_internal);
9199   defsubr (&Sterminal_coding_system);
9200   defsubr (&Sset_keyboard_coding_system_internal);
9201   defsubr (&Skeyboard_coding_system);
9202   defsubr (&Sfind_operation_coding_system);
9203   defsubr (&Sset_coding_system_priority);
9204   defsubr (&Sdefine_coding_system_internal);
9205   defsubr (&Sdefine_coding_system_alias);
9206   defsubr (&Scoding_system_put);
9207   defsubr (&Scoding_system_base);
9208   defsubr (&Scoding_system_plist);
9209   defsubr (&Scoding_system_aliases);
9210   defsubr (&Scoding_system_eol_type);
9211   defsubr (&Scoding_system_priority_list);
9212
9213   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
9214                doc: /* List of coding systems.
9215
9216 Do not alter the value of this variable manually.  This variable should be
9217 updated by the functions `define-coding-system' and
9218 `define-coding-system-alias'.  */);
9219   Vcoding_system_list = Qnil;
9220
9221   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
9222                doc: /* Alist of coding system names.
9223 Each element is one element list of coding system name.
9224 This variable is given to `completing-read' as TABLE argument.
9225
9226 Do not alter the value of this variable manually.  This variable should be
9227 updated by the functions `make-coding-system' and
9228 `define-coding-system-alias'.  */);
9229   Vcoding_system_alist = Qnil;
9230
9231   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
9232                doc: /* List of coding-categories (symbols) ordered by priority.
9233
9234 On detecting a coding system, Emacs tries code detection algorithms
9235 associated with each coding-category one by one in this order.  When
9236 one algorithm agrees with a byte sequence of source text, the coding
9237 system bound to the corresponding coding-category is selected.  */);
9238   {
9239     int i;
9240
9241     Vcoding_category_list = Qnil;
9242     for (i = coding_category_max - 1; i >= 0; i--)
9243       Vcoding_category_list
9244         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
9245                  Vcoding_category_list);
9246   }
9247
9248   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
9249                doc: /* Specify the coding system for read operations.
9250 It is useful to bind this variable with `let', but do not set it globally.
9251 If the value is a coding system, it is used for decoding on read operation.
9252 If not, an appropriate element is used from one of the coding system alists:
9253 There are three such tables, `file-coding-system-alist',
9254 `process-coding-system-alist', and `network-coding-system-alist'.  */);
9255   Vcoding_system_for_read = Qnil;
9256
9257   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
9258                doc: /* Specify the coding system for write operations.
9259 Programs bind this variable with `let', but you should not set it globally.
9260 If the value is a coding system, it is used for encoding of output,
9261 when writing it to a file and when sending it to a file or subprocess.
9262
9263 If this does not specify a coding system, an appropriate element
9264 is used from one of the coding system alists:
9265 There are three such tables, `file-coding-system-alist',
9266 `process-coding-system-alist', and `network-coding-system-alist'.
9267 For output to files, if the above procedure does not specify a coding system,
9268 the value of `buffer-file-coding-system' is used.  */);
9269   Vcoding_system_for_write = Qnil;
9270
9271   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
9272                doc: /*
9273 Coding system used in the latest file or process I/O.  */);
9274   Vlast_coding_system_used = Qnil;
9275
9276   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
9277                doc: /*
9278 Error status of the last code conversion.
9279
9280 When an error was detected in the last code conversion, this variable
9281 is set to one of the following symbols.
9282   `insufficient-source'
9283   `inconsistent-eol'
9284   `invalid-source'
9285   `interrupted'
9286   `insufficient-memory'
9287 When no error was detected, the value doesn't change.  So, to check
9288 the error status of a code conversion by this variable, you must
9289 explicitly set this variable to nil before performing code
9290 conversion.  */);
9291   Vlast_code_conversion_error = Qnil;
9292
9293   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
9294                doc: /*
9295 *Non-nil means always inhibit code conversion of end-of-line format.
9296 See info node `Coding Systems' and info node `Text and Binary' concerning
9297 such conversion.  */);
9298   inhibit_eol_conversion = 0;
9299
9300   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
9301                doc: /*
9302 Non-nil means process buffer inherits coding system of process output.
9303 Bind it to t if the process output is to be treated as if it were a file
9304 read from some filesystem.  */);
9305   inherit_process_coding_system = 0;
9306
9307   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
9308                doc: /*
9309 Alist to decide a coding system to use for a file I/O operation.
9310 The format is ((PATTERN . VAL) ...),
9311 where PATTERN is a regular expression matching a file name,
9312 VAL is a coding system, a cons of coding systems, or a function symbol.
9313 If VAL is a coding system, it is used for both decoding and encoding
9314 the file contents.
9315 If VAL is a cons of coding systems, the car part is used for decoding,
9316 and the cdr part is used for encoding.
9317 If VAL is a function symbol, the function must return a coding system
9318 or a cons of coding systems which are used as above.  The function gets
9319 the arguments with which `find-operation-coding-systems' was called.
9320
9321 See also the function `find-operation-coding-system'
9322 and the variable `auto-coding-alist'.  */);
9323   Vfile_coding_system_alist = Qnil;
9324
9325   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
9326                doc: /*
9327 Alist to decide a coding system to use for a process I/O operation.
9328 The format is ((PATTERN . VAL) ...),
9329 where PATTERN is a regular expression matching a program name,
9330 VAL is a coding system, a cons of coding systems, or a function symbol.
9331 If VAL is a coding system, it is used for both decoding what received
9332 from the program and encoding what sent to the program.
9333 If VAL is a cons of coding systems, the car part is used for decoding,
9334 and the cdr part is used for encoding.
9335 If VAL is a function symbol, the function must return a coding system
9336 or a cons of coding systems which are used as above.
9337
9338 See also the function `find-operation-coding-system'.  */);
9339   Vprocess_coding_system_alist = Qnil;
9340
9341   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
9342                doc: /*
9343 Alist to decide a coding system to use for a network I/O operation.
9344 The format is ((PATTERN . VAL) ...),
9345 where PATTERN is a regular expression matching a network service name
9346 or is a port number to connect to,
9347 VAL is a coding system, a cons of coding systems, or a function symbol.
9348 If VAL is a coding system, it is used for both decoding what received
9349 from the network stream and encoding what sent to the network stream.
9350 If VAL is a cons of coding systems, the car part is used for decoding,
9351 and the cdr part is used for encoding.
9352 If VAL is a function symbol, the function must return a coding system
9353 or a cons of coding systems which are used as above.
9354
9355 See also the function `find-operation-coding-system'.  */);
9356   Vnetwork_coding_system_alist = Qnil;
9357
9358   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
9359                doc: /* Coding system to use with system messages.
9360 Also used for decoding keyboard input on X Window system.  */);
9361   Vlocale_coding_system = Qnil;
9362
9363   /* The eol mnemonics are reset in startup.el system-dependently.  */
9364   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
9365                doc: /*
9366 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
9367   eol_mnemonic_unix = build_string (":");
9368
9369   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
9370                doc: /*
9371 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
9372   eol_mnemonic_dos = build_string ("\\");
9373
9374   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
9375                doc: /*
9376 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
9377   eol_mnemonic_mac = build_string ("/");
9378
9379   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
9380                doc: /*
9381 *String displayed in mode line when end-of-line format is not yet determined.  */);
9382   eol_mnemonic_undecided = build_string (":");
9383
9384   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
9385                doc: /*
9386 *Non-nil enables character translation while encoding and decoding.  */);
9387   Venable_character_translation = Qt;
9388
9389   DEFVAR_LISP ("standard-translation-table-for-decode",
9390                &Vstandard_translation_table_for_decode,
9391                doc: /* Table for translating characters while decoding.  */);
9392   Vstandard_translation_table_for_decode = Qnil;
9393
9394   DEFVAR_LISP ("standard-translation-table-for-encode",
9395                &Vstandard_translation_table_for_encode,
9396                doc: /* Table for translating characters while encoding.  */);
9397   Vstandard_translation_table_for_encode = Qnil;
9398
9399   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
9400                doc: /* Alist of charsets vs revision numbers.
9401 While encoding, if a charset (car part of an element) is found,
9402 designate it with the escape sequence identifying revision (cdr part
9403 of the element).  */);
9404   Vcharset_revision_table = Qnil;
9405
9406   DEFVAR_LISP ("default-process-coding-system",
9407                &Vdefault_process_coding_system,
9408                doc: /* Cons of coding systems used for process I/O by default.
9409 The car part is used for decoding a process output,
9410 the cdr part is used for encoding a text to be sent to a process.  */);
9411   Vdefault_process_coding_system = Qnil;
9412
9413   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
9414                doc: /*
9415 Table of extra Latin codes in the range 128..159 (inclusive).
9416 This is a vector of length 256.
9417 If Nth element is non-nil, the existence of code N in a file
9418 \(or output of subprocess) doesn't prevent it to be detected as
9419 a coding system of ISO 2022 variant which has a flag
9420 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9421 or reading output of a subprocess.
9422 Only 128th through 159th elements has a meaning.  */);
9423   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
9424
9425   DEFVAR_LISP ("select-safe-coding-system-function",
9426                &Vselect_safe_coding_system_function,
9427                doc: /*
9428 Function to call to select safe coding system for encoding a text.
9429
9430 If set, this function is called to force a user to select a proper
9431 coding system which can encode the text in the case that a default
9432 coding system used in each operation can't encode the text.
9433
9434 The default value is `select-safe-coding-system' (which see).  */);
9435   Vselect_safe_coding_system_function = Qnil;
9436
9437   DEFVAR_BOOL ("coding-system-require-warning",
9438                &coding_system_require_warning,
9439                doc: /* Internal use only.
9440 If non-nil, on writing a file, `select-safe-coding-system-function' is
9441 called even if `coding-system-for-write' is non-nil.  The command
9442 `universal-coding-system-argument' binds this variable to t temporarily.  */);
9443   coding_system_require_warning = 0;
9444
9445
9446   DEFVAR_BOOL ("inhibit-iso-escape-detection",
9447                &inhibit_iso_escape_detection,
9448                doc: /*
9449 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9450
9451 By default, on reading a file, Emacs tries to detect how the text is
9452 encoded.  This code detection is sensitive to escape sequences.  If
9453 the sequence is valid as ISO2022, the code is determined as one of
9454 the ISO2022 encodings, and the file is decoded by the corresponding
9455 coding system (e.g. `iso-2022-7bit').
9456
9457 However, there may be a case that you want to read escape sequences in
9458 a file as is.  In such a case, you can set this variable to non-nil.
9459 Then, as the code detection ignores any escape sequences, no file is
9460 detected as encoded in some ISO2022 encoding.  The result is that all
9461 escape sequences become visible in a buffer.
9462
9463 The default value is nil, and it is strongly recommended not to change
9464 it.  That is because many Emacs Lisp source files that contain
9465 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9466 in Emacs's distribution, and they won't be decoded correctly on
9467 reading if you suppress escape sequence detection.
9468
9469 The other way to read escape sequences in a file without decoding is
9470 to explicitly specify some coding system that doesn't use ISO2022's
9471 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
9472   inhibit_iso_escape_detection = 0;
9473
9474   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
9475                doc: /* Char table for translating self-inserting characters.
9476 This is applied to the result of input methods, not their input.  See also
9477 `keyboard-translate-table'.  */);
9478     Vtranslation_table_for_input = Qnil;
9479
9480   {
9481     Lisp_Object args[coding_arg_max];
9482     Lisp_Object plist[16];
9483     int i;
9484
9485     for (i = 0; i < coding_arg_max; i++)
9486       args[i] = Qnil;
9487
9488     plist[0] = intern (":name");
9489     plist[1] = args[coding_arg_name] = Qno_conversion;
9490     plist[2] = intern (":mnemonic");
9491     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
9492     plist[4] = intern (":coding-type");
9493     plist[5] = args[coding_arg_coding_type] = Qraw_text;
9494     plist[6] = intern (":ascii-compatible-p");
9495     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
9496     plist[8] = intern (":default-char");
9497     plist[9] = args[coding_arg_default_char] = make_number (0);
9498     plist[10] = intern (":for-unibyte");
9499     plist[11] = args[coding_arg_for_unibyte] = Qt;
9500     plist[12] = intern (":docstring");
9501     plist[13] = build_string ("Do no conversion.\n\
9502 \n\
9503 When you visit a file with this coding, the file is read into a\n\
9504 unibyte buffer as is, thus each byte of a file is treated as a\n\
9505 character.");
9506     plist[14] = intern (":eol-type");
9507     plist[15] = args[coding_arg_eol_type] = Qunix;
9508     args[coding_arg_plist] = Flist (16, plist);
9509     Fdefine_coding_system_internal (coding_arg_max, args);
9510   }
9511
9512   setup_coding_system (Qno_conversion, &keyboard_coding);
9513   setup_coding_system (Qno_conversion, &terminal_coding);
9514   setup_coding_system (Qno_conversion, &safe_terminal_coding);
9515
9516   {
9517     int i;
9518
9519     for (i = 0; i < coding_category_max; i++)
9520       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
9521   }
9522 }
9523
9524 char *
9525 emacs_strerror (error_number)
9526      int error_number;
9527 {
9528   char *str;
9529
9530   synchronize_system_messages_locale ();
9531   str = strerror (error_number);
9532
9533   if (! NILP (Vlocale_coding_system))
9534     {
9535       Lisp_Object dec = code_convert_string_norecord (build_string (str),
9536                                                       Vlocale_coding_system,
9537                                                       0);
9538       str = (char *) SDATA (dec);
9539     }
9540
9541   return str;
9542 }
9543
9544 #endif /* emacs */