src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3      Licensed to the Free Software Foundation.
   4    Copyright (C) 2001, 2002 Free Software Foundation, Inc.
   5    Copyright (C) 2003
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H13PRO009
   8
   9 This file is part of GNU Emacs.
  10
  11 GNU Emacs is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU General Public License as published by
  13 the Free Software Foundation; either version 2, or (at your option)
  14 any later version.
  15
  16 GNU Emacs is distributed in the hope that it will be useful,
  17 but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 GNU General Public License for more details.
  20
  21 You should have received a copy of the GNU General Public License
  22 along with GNU Emacs; see the file COPYING.  If not, write to
  23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  24 Boston, MA 02111-1307, USA.  */
  25
  26 /*** TABLE OF CONTENTS ***
  27
  28   0. General comments
  29   1. Preamble
  30   2. Emacs' internal format (emacs-utf-8) handlers
  31   3. UTF-8 handlers
  32   4. UTF-16 handlers
  33   5. Charset-base coding systems handlers
  34   6. emacs-mule (old Emacs' internal format) handlers
  35   7. ISO2022 handlers
  36   8. Shift-JIS and BIG5 handlers
  37   9. CCL handlers
  38   10. C library functions
  39   11. Emacs Lisp library functions
  40   12. Postamble
  41
  42 */
  43
  44 /*** 0. General comments ***
  45
  46
  47 CODING SYSTEM
  48
  49   A coding system is an object for an encoding mechanism that contains
  50   information about how to convert byte sequences to character
  51   sequences and vice versa.  When we say "decode", it means converting
  52   a byte sequence of a specific coding system into a character
  53   sequence that is represented by Emacs' internal coding system
  54   `emacs-utf-8', and when we say "encode", it means converting a
  55   character sequence of emacs-utf-8 to a byte sequence of a specific
  56   coding system.
  57
  58   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  59   C level, a coding system is represented by a vector of attributes
  60   stored in the hash table Vcharset_hash_table.  The conversion from
  61   coding system symbol to attributes vector is done by looking up
  62   Vcharset_hash_table by the symbol.
  63
  64   Coding systems are classified into the following types depending on
  65   the encoding mechanism.  Here's a brief description of the types.
  66
  67   o UTF-8
  68
  69   o UTF-16
  70
  71   o Charset-base coding system
  72
  73   A coding system defined by one or more (coded) character sets.
  74   Decoding and encoding are done by a code converter defined for each
  75   character set.
  76
  77   o Old Emacs internal format (emacs-mule)
  78
  79   The coding system adopted by old versions of Emacs (20 and 21).
  80
  81   o ISO2022-base coding system
  82
  83   The most famous coding system for multiple character sets.  X's
  84   Compound Text, various EUCs (Extended Unix Code), and coding systems
  85   used in the Internet communication such as ISO-2022-JP are all
  86   variants of ISO2022.
  87
  88   o SJIS (or Shift-JIS or MS-Kanji-Code)
  89
  90   A coding system to encode character sets: ASCII, JISX0201, and
  91   JISX0208.  Widely used for PC's in Japan.  Details are described in
  92   section 8.
  93
  94   o BIG5
  95
  96   A coding system to encode character sets: ASCII and Big5.  Widely
  97   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  98   described in section 8.  In this file, when we write "big5" (all
  99   lowercase), we mean the coding system, and when we write "Big5"
 100   (capitalized), we mean the character set.
 101
 102   o CCL
 103
 104   If a user wants to decode/encode text encoded in a coding system
 105   not listed above, he can supply a decoder and an encoder for it in
 106   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 107   program while decoding/encoding.
 108
 109   o Raw-text
 110
 111   A coding system for text containing raw eight-bit data.  Emacs
 112   treats each byte of source text as a character (except for
 113   end-of-line conversion).
 114
 115   o No-conversion
 116
 117   Like raw text, but don't do end-of-line conversion.
 118
 119
 120 END-OF-LINE FORMAT
 121
 122   How text end-of-line is encoded depends on operating system.  For
 123   instance, Unix's format is just one byte of LF (line-feed) code,
 124   whereas DOS's format is two-byte sequence of `carriage-return' and
 125   `line-feed' codes.  MacOS's format is usually one byte of
 126   `carriage-return'.
 127
 128   Since text character encoding and end-of-line encoding are
 129   independent, any coding system described above can take any format
 130   of end-of-line (except for no-conversion).
 131
 132 STRUCT CODING_SYSTEM
 133
 134   Before using a coding system for code conversion (i.e. decoding and
 135   encoding), we setup a structure of type `struct coding_system'.
 136   This structure keeps various information about a specific code
 137   conversion (e.g. the location of source and destination data).
 138
 139 */
 140
 141 /* COMMON MACROS */
 142
 143
 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 145
 146   These functions check if a byte sequence specified as a source in
 147   CODING conforms to the format of XXX, and update the members of
 148   DETECT_INFO.
 149
 150   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 151
 152   Below is the template of these functions.  */
 153
 154 #if 0
 155 static int
 156 detect_coding_XXX (coding, detect_info)
 157      struct coding_system *coding;
 158      struct coding_detection_info *detect_info;
 159 {
 160   unsigned char *src = coding->source;
 161   unsigned char *src_end = coding->source + coding->src_bytes;
 162   int multibytep = coding->src_multibyte;
 163   int consumed_chars = 0;
 164   int found = 0;
 165   ...;
 166
 167   while (1)
 168     {
 169       /* Get one byte from the source.  If the souce is exausted, jump
 170          to no_more_source:.  */
 171       ONE_MORE_BYTE (c);
 172
 173       if (! __C_conforms_to_XXX___ (c))
 174         break;
 175       if (! __C_strongly_suggests_XXX__ (c))
 176         found = CATEGORY_MASK_XXX;
 177     }
 178   /* The byte sequence is invalid for XXX.  */
 179   detect_info->rejected |= CATEGORY_MASK_XXX;
 180   return 0;
 181
 182  no_more_source:
 183   /* The source exausted successfully.  */
 184   detect_info->found |= found;
 185   return 1;
 186 }
 187 #endif
 188
 189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 190
 191   These functions decode a byte sequence specified as a source by
 192   CODING.  The resulting multibyte text goes to a place pointed to by
 193   CODING->charbuf, the length of which should not exceed
 194   CODING->charbuf_size;
 195
 196   These functions set the information of original and decoded texts in
 197   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 198   They also set CODING->result to one of CODING_RESULT_XXX indicating
 199   how the decoding is finished.
 200
 201   Below is the template of these functions.  */
 202
 203 #if 0
 204 static void
 205 decode_coding_XXXX (coding)
 206      struct coding_system *coding;
 207 {
 208   unsigned char *src = coding->source + coding->consumed;
 209   unsigned char *src_end = coding->source + coding->src_bytes;
 210   /* SRC_BASE remembers the start position in source in each loop.
 211      The loop will be exited when there's not enough source code, or
 212      when there's no room in CHARBUF for a decoded character.  */
 213   unsigned char *src_base;
 214   /* A buffer to produce decoded characters.  */
 215   int *charbuf = coding->charbuf;
 216   int *charbuf_end = charbuf + coding->charbuf_size;
 217   int multibytep = coding->src_multibyte;
 218
 219   while (1)
 220     {
 221       src_base = src;
 222       if (charbuf < charbuf_end)
 223         /* No more room to produce a decoded character.  */
 224         break;
 225       ONE_MORE_BYTE (c);
 226       /* Decode it. */
 227     }
 228
 229  no_more_source:
 230   if (src_base < src_end
 231       && coding->mode & CODING_MODE_LAST_BLOCK)
 232     /* If the source ends by partial bytes to construct a character,
 233        treat them as eight-bit raw data.  */
 234     while (src_base < src_end && charbuf < charbuf_end)
 235       *charbuf++ = *src_base++;
 236   /* Remember how many bytes and characters we consumed.  If the
 237      source is multibyte, the bytes and chars are not identical.  */
 238   coding->consumed = coding->consumed_char = src_base - coding->source;
 239   /* Remember how many characters we produced.  */
 240   coding->charbuf_used = charbuf - coding->charbuf;
 241 }
 242 #endif
 243
 244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 245
 246   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 247   internal multibyte format by CODING.  The resulting byte sequence
 248   goes to a place pointed to by DESTINATION, the length of which
 249   should not exceed DST_BYTES.
 250
 251   These functions set the information of original and encoded texts in
 252   the members produced, produced_char, consumed, and consumed_char of
 253   the structure *CODING.  They also set the member result to one of
 254   CODING_RESULT_XXX indicating how the encoding finished.
 255
 256   DST_BYTES zero means that source area and destination area are
 257   overlapped, which means that we can produce a encoded text until it
 258   reaches at the head of not-yet-encoded source text.
 259
 260   Below is a template of these functions.  */
 261 #if 0
 262 static void
 263 encode_coding_XXX (coding)
 264      struct coding_system *coding;
 265 {
 266   int multibytep = coding->dst_multibyte;
 267   int *charbuf = coding->charbuf;
 268   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 269   unsigned char *dst = coding->destination + coding->produced;
 270   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 271   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 272   int produced_chars = 0;
 273
 274   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 275     {
 276       int c = *charbuf;
 277       /* Encode C into DST, and increment DST.  */
 278     }
 279  label_no_more_destination:
 280   /* How many chars and bytes we produced.  */
 281   coding->produced_char += produced_chars;
 282   coding->produced = dst - coding->destination;
 283 }
 284 #endif
 285
 286 \f
 287 /*** 1. Preamble ***/
 288
 289 #include <config.h>
 290 #include <stdio.h>
 291
 292 #include "lisp.h"
 293 #include "buffer.h"
 294 #include "character.h"
 295 #include "charset.h"
 296 #include "ccl.h"
 297 #include "composite.h"
 298 #include "coding.h"
 299 #include "window.h"
 300
 301 Lisp_Object Vcoding_system_hash_table;
 302
 303 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 304 Lisp_Object Qunix, Qdos;
 305 extern Lisp_Object Qmac;        /* frame.c */
 306 Lisp_Object Qbuffer_file_coding_system;
 307 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 308 Lisp_Object Qdefault_char;
 309 Lisp_Object Qno_conversion, Qundecided;
 310 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 311 Lisp_Object Qbig, Qlittle;
 312 Lisp_Object Qcoding_system_history;
 313 Lisp_Object Qvalid_codes;
 314 Lisp_Object QCcategory;
 315
 316 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 317 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 318 Lisp_Object Qstart_process, Qopen_network_stream;
 319 Lisp_Object Qtarget_idx;
 320
 321 int coding_system_require_warning;
 322
 323 Lisp_Object Vselect_safe_coding_system_function;
 324
 325 /* Mnemonic string for each format of end-of-line.  */
 326 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 327 /* Mnemonic string to indicate format of end-of-line is not yet
 328    decided.  */
 329 Lisp_Object eol_mnemonic_undecided;
 330
 331 #ifdef emacs
 332
 333 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 334
 335 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 336
 337 /* Coding system emacs-mule and raw-text are for converting only
 338    end-of-line format.  */
 339 Lisp_Object Qemacs_mule, Qraw_text;
 340 Lisp_Object Qutf_8_emacs;
 341
 342 /* Coding-systems are handed between Emacs Lisp programs and C internal
 343    routines by the following three variables.  */
 344 /* Coding-system for reading files and receiving data from process.  */
 345 Lisp_Object Vcoding_system_for_read;
 346 /* Coding-system for writing files and sending data to process.  */
 347 Lisp_Object Vcoding_system_for_write;
 348 /* Coding-system actually used in the latest I/O.  */
 349 Lisp_Object Vlast_coding_system_used;
 350
 351 /* A vector of length 256 which contains information about special
 352    Latin codes (especially for dealing with Microsoft codes).  */
 353 Lisp_Object Vlatin_extra_code_table;
 354
 355 /* Flag to inhibit code conversion of end-of-line format.  */
 356 int inhibit_eol_conversion;
 357
 358 /* Flag to inhibit ISO2022 escape sequence detection.  */
 359 int inhibit_iso_escape_detection;
 360
 361 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 362 int inherit_process_coding_system;
 363
 364 /* Coding system to be used to encode text for terminal display.  */
 365 struct coding_system terminal_coding;
 366
 367 /* Coding system to be used to encode text for terminal display when
 368    terminal coding system is nil.  */
 369 struct coding_system safe_terminal_coding;
 370
 371 /* Coding system of what is sent from terminal keyboard.  */
 372 struct coding_system keyboard_coding;
 373
 374 Lisp_Object Vfile_coding_system_alist;
 375 Lisp_Object Vprocess_coding_system_alist;
 376 Lisp_Object Vnetwork_coding_system_alist;
 377
 378 Lisp_Object Vlocale_coding_system;
 379
 380 #endif /* emacs */
 381
 382 /* Flag to tell if we look up translation table on character code
 383    conversion.  */
 384 Lisp_Object Venable_character_translation;
 385 /* Standard translation table to look up on decoding (reading).  */
 386 Lisp_Object Vstandard_translation_table_for_decode;
 387 /* Standard translation table to look up on encoding (writing).  */
 388 Lisp_Object Vstandard_translation_table_for_encode;
 389
 390 Lisp_Object Qtranslation_table;
 391 Lisp_Object Qtranslation_table_id;
 392 Lisp_Object Qtranslation_table_for_decode;
 393 Lisp_Object Qtranslation_table_for_encode;
 394
 395 /* Alist of charsets vs revision number.  */
 396 static Lisp_Object Vcharset_revision_table;
 397
 398 /* Default coding systems used for process I/O.  */
 399 Lisp_Object Vdefault_process_coding_system;
 400
 401 /* Char table for translating Quail and self-inserting input.  */
 402 Lisp_Object Vtranslation_table_for_input;
 403
 404 /* Two special coding systems.  */
 405 Lisp_Object Vsjis_coding_system;
 406 Lisp_Object Vbig5_coding_system;
 407
 408
 409 static int detect_coding_utf_8 P_ ((struct coding_system *,
 410                                     struct coding_detection_info *info));
 411 static void decode_coding_utf_8 P_ ((struct coding_system *));
 412 static int encode_coding_utf_8 P_ ((struct coding_system *));
 413
 414 static int detect_coding_utf_16 P_ ((struct coding_system *,
 415                                      struct coding_detection_info *info));
 416 static void decode_coding_utf_16 P_ ((struct coding_system *));
 417 static int encode_coding_utf_16 P_ ((struct coding_system *));
 418
 419 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 420                                        struct coding_detection_info *info));
 421 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 422 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 423
 424 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 425                                          struct coding_detection_info *info));
 426 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 427 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 428
 429 static int detect_coding_sjis P_ ((struct coding_system *,
 430                                    struct coding_detection_info *info));
 431 static void decode_coding_sjis P_ ((struct coding_system *));
 432 static int encode_coding_sjis P_ ((struct coding_system *));
 433
 434 static int detect_coding_big5 P_ ((struct coding_system *,
 435                                    struct coding_detection_info *info));
 436 static void decode_coding_big5 P_ ((struct coding_system *));
 437 static int encode_coding_big5 P_ ((struct coding_system *));
 438
 439 static int detect_coding_ccl P_ ((struct coding_system *,
 440                                   struct coding_detection_info *info));
 441 static void decode_coding_ccl P_ ((struct coding_system *));
 442 static int encode_coding_ccl P_ ((struct coding_system *));
 443
 444 static void decode_coding_raw_text P_ ((struct coding_system *));
 445 static int encode_coding_raw_text P_ ((struct coding_system *));
 446
 447
 448 /* ISO2022 section */
 449
 450 #define CODING_ISO_INITIAL(coding, reg)                 \
 451   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 452                      coding_attr_iso_initial),          \
 453                reg)))
 454
 455
 456 #define CODING_ISO_REQUEST(coding, charset_id)  \
 457   ((charset_id <= (coding)->max_charset_id      \
 458     ? (coding)->safe_charsets[charset_id]       \
 459     : -1))
 460
 461
 462 #define CODING_ISO_FLAGS(coding)        \
 463   ((coding)->spec.iso_2022.flags)
 464 #define CODING_ISO_DESIGNATION(coding, reg)     \
 465   ((coding)->spec.iso_2022.current_designation[reg])
 466 #define CODING_ISO_INVOCATION(coding, plane)    \
 467   ((coding)->spec.iso_2022.current_invocation[plane])
 468 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 469   ((coding)->spec.iso_2022.single_shifting)
 470 #define CODING_ISO_BOL(coding)  \
 471   ((coding)->spec.iso_2022.bol)
 472 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 473   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 474
 475 /* Control characters of ISO2022.  */
 476                         /* code */      /* function */
 477 #define ISO_CODE_LF     0x0A            /* line-feed */
 478 #define ISO_CODE_CR     0x0D            /* carriage-return */
 479 #define ISO_CODE_SO     0x0E            /* shift-out */
 480 #define ISO_CODE_SI     0x0F            /* shift-in */
 481 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 482 #define ISO_CODE_ESC    0x1B            /* escape */
 483 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 484 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 485 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 486
 487 /* All code (1-byte) of ISO2022 is classified into one of the
 488    followings.  */
 489 enum iso_code_class_type
 490   {
 491     ISO_control_0,              /* Control codes in the range
 492                                    0x00..0x1F and 0x7F, except for the
 493                                    following 5 codes.  */
 494     ISO_carriage_return,        /* ISO_CODE_CR (0x0D) */
 495     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 496     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 497     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 498     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 499     ISO_control_1,              /* Control codes in the range
 500                                    0x80..0x9F, except for the
 501                                    following 3 codes.  */
 502     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 503     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 504     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 505     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 506     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 507     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 508     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 509   };
 510
 511 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 512     `iso-flags' attribute of an iso2022 coding system.  */
 513
 514 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 515    instead of the correct short-form sequence (e.g. ESC $ A).  */
 516 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 517
 518 /* If set, reset graphic planes and registers at end-of-line to the
 519    initial state.  */
 520 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 521
 522 /* If set, reset graphic planes and registers before any control
 523    characters to the initial state.  */
 524 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 525
 526 /* If set, encode by 7-bit environment.  */
 527 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 528
 529 /* If set, use locking-shift function.  */
 530 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 531
 532 /* If set, use single-shift function.  Overwrite
 533    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 534 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 535
 536 /* If set, use designation escape sequence.  */
 537 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 538
 539 /* If set, produce revision number sequence.  */
 540 #define CODING_ISO_FLAG_REVISION        0x0080
 541
 542 /* If set, produce ISO6429's direction specifying sequence.  */
 543 #define CODING_ISO_FLAG_DIRECTION       0x0100
 544
 545 /* If set, assume designation states are reset at beginning of line on
 546    output.  */
 547 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 548
 549 /* If set, designation sequence should be placed at beginning of line
 550    on output.  */
 551 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 552
 553 /* If set, do not encode unsafe charactes on output.  */
 554 #define CODING_ISO_FLAG_SAFE            0x0800
 555
 556 /* If set, extra latin codes (128..159) are accepted as a valid code
 557    on input.  */
 558 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 559
 560 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 561
 562 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 563
 564 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 565
 566 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 567
 568 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 569
 570 /* A character to be produced on output if encoding of the original
 571    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 572 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 573
 574
 575 /* UTF-16 section */
 576 #define CODING_UTF_16_BOM(coding)       \
 577   ((coding)->spec.utf_16.bom)
 578
 579 #define CODING_UTF_16_ENDIAN(coding)    \
 580   ((coding)->spec.utf_16.endian)
 581
 582 #define CODING_UTF_16_SURROGATE(coding) \
 583   ((coding)->spec.utf_16.surrogate)
 584
 585
 586 /* CCL section */
 587 #define CODING_CCL_DECODER(coding)      \
 588   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 589 #define CODING_CCL_ENCODER(coding)      \
 590   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 591 #define CODING_CCL_VALIDS(coding)                                          \
 592   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 593
 594 /* Index for each coding category in `coding_categories' */
 595
 596 enum coding_category
 597   {
 598     coding_category_iso_7,
 599     coding_category_iso_7_tight,
 600     coding_category_iso_8_1,
 601     coding_category_iso_8_2,
 602     coding_category_iso_7_else,
 603     coding_category_iso_8_else,
 604     coding_category_utf_8,
 605     coding_category_utf_16_auto,
 606     coding_category_utf_16_be,
 607     coding_category_utf_16_le,
 608     coding_category_utf_16_be_nosig,
 609     coding_category_utf_16_le_nosig,
 610     coding_category_charset,
 611     coding_category_sjis,
 612     coding_category_big5,
 613     coding_category_ccl,
 614     coding_category_emacs_mule,
 615     /* All above are targets of code detection.  */
 616     coding_category_raw_text,
 617     coding_category_undecided,
 618     coding_category_max
 619   };
 620
 621 /* Definitions of flag bits used in detect_coding_XXXX.  */
 622 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 623 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 624 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 625 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 626 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 627 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 628 #define CATEGORY_MASK_UTF_8             (1 << coding_category_utf_8)
 629 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 630 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 631 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 632 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 633 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 634 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 635 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 636 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 637 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 638 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 639 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 640
 641 /* This value is returned if detect_coding_mask () find nothing other
 642    than ASCII characters.  */
 643 #define CATEGORY_MASK_ANY               \
 644   (CATEGORY_MASK_ISO_7                  \
 645    | CATEGORY_MASK_ISO_7_TIGHT          \
 646    | CATEGORY_MASK_ISO_8_1              \
 647    | CATEGORY_MASK_ISO_8_2              \
 648    | CATEGORY_MASK_ISO_7_ELSE           \
 649    | CATEGORY_MASK_ISO_8_ELSE           \
 650    | CATEGORY_MASK_UTF_8                \
 651    | CATEGORY_MASK_UTF_16_BE            \
 652    | CATEGORY_MASK_UTF_16_LE            \
 653    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 654    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 655    | CATEGORY_MASK_CHARSET              \
 656    | CATEGORY_MASK_SJIS                 \
 657    | CATEGORY_MASK_BIG5                 \
 658    | CATEGORY_MASK_CCL                  \
 659    | CATEGORY_MASK_EMACS_MULE)
 660
 661
 662 #define CATEGORY_MASK_ISO_7BIT \
 663   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 664
 665 #define CATEGORY_MASK_ISO_8BIT \
 666   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 667
 668 #define CATEGORY_MASK_ISO_ELSE \
 669   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 670
 671 #define CATEGORY_MASK_ISO_ESCAPE        \
 672   (CATEGORY_MASK_ISO_7                  \
 673    | CATEGORY_MASK_ISO_7_TIGHT          \
 674    | CATEGORY_MASK_ISO_7_ELSE           \
 675    | CATEGORY_MASK_ISO_8_ELSE)
 676
 677 #define CATEGORY_MASK_ISO       \
 678   (  CATEGORY_MASK_ISO_7BIT     \
 679      | CATEGORY_MASK_ISO_8BIT   \
 680      | CATEGORY_MASK_ISO_ELSE)
 681
 682 #define CATEGORY_MASK_UTF_16            \
 683   (CATEGORY_MASK_UTF_16_BE              \
 684    | CATEGORY_MASK_UTF_16_LE            \
 685    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 686    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 687
 688
 689 /* List of symbols `coding-category-xxx' ordered by priority.  This
 690    variable is exposed to Emacs Lisp.  */
 691 static Lisp_Object Vcoding_category_list;
 692
 693 /* Table of coding categories (Lisp symbols).  This variable is for
 694    internal use oly.  */
 695 static Lisp_Object Vcoding_category_table;
 696
 697 /* Table of coding-categories ordered by priority.  */
 698 static enum coding_category coding_priorities[coding_category_max];
 699
 700 /* Nth element is a coding context for the coding system bound to the
 701    Nth coding category.  */
 702 static struct coding_system coding_categories[coding_category_max];
 703
 704 /*** Commonly used macros and functions ***/
 705
 706 #ifndef min
 707 #define min(a, b) ((a) < (b) ? (a) : (b))
 708 #endif
 709 #ifndef max
 710 #define max(a, b) ((a) > (b) ? (a) : (b))
 711 #endif
 712
 713 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list)  \
 714   do {                                                          \
 715     attrs = CODING_ID_ATTRS (coding->id);                       \
 716     eol_type = CODING_ID_EOL_TYPE (coding->id);                 \
 717     if (VECTORP (eol_type))                                     \
 718       eol_type = Qunix;                                         \
 719     charset_list = CODING_ATTR_CHARSET_LIST (attrs);            \
 720   } while (0)
 721
 722
 723 /* Safely get one byte from the source text pointed by SRC which ends
 724    at SRC_END, and set C to that byte.  If there are not enough bytes
 725    in the source, it jumps to `no_more_source'.  The caller
 726    should declare and set these variables appropriately in advance:
 727         src, src_end, multibytep
 728 */
 729
 730 #define ONE_MORE_BYTE(c)                                        \
 731   do {                                                          \
 732     if (src == src_end)                                         \
 733       {                                                         \
 734         if (src_base < src)                                     \
 735           coding->result = CODING_RESULT_INSUFFICIENT_SRC;      \
 736         goto no_more_source;                                    \
 737       }                                                         \
 738     c = *src++;                                                 \
 739     if (multibytep && (c & 0x80))                               \
 740       {                                                         \
 741         if ((c & 0xFE) != 0xC0)                                 \
 742           error ("Undecodable char found");                     \
 743         c = ((c & 1) << 6) | *src++;                            \
 744       }                                                         \
 745     consumed_chars++;                                           \
 746   } while (0)
 747
 748
 749 #define ONE_MORE_BYTE_NO_CHECK(c)               \
 750   do {                                          \
 751     c = *src++;                                 \
 752     if (multibytep && (c & 0x80))               \
 753       {                                         \
 754         if ((c & 0xFE) != 0xC0)                 \
 755           error ("Undecodable char found");     \
 756         c = ((c & 1) << 6) | *src++;            \
 757       }                                         \
 758     consumed_chars++;                           \
 759   } while (0)
 760
 761
 762 /* Store a byte C in the place pointed by DST and increment DST to the
 763    next free point, and increment PRODUCED_CHARS.  The caller should
 764    assure that C is 0..127, and declare and set the variable `dst'
 765    appropriately in advance.
 766 */
 767
 768
 769 #define EMIT_ONE_ASCII_BYTE(c)  \
 770   do {                          \
 771     produced_chars++;           \
 772     *dst++ = (c);               \
 773   } while (0)
 774
 775
 776 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 777
 778 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 779   do {                                  \
 780     produced_chars += 2;                \
 781     *dst++ = (c1), *dst++ = (c2);       \
 782   } while (0)
 783
 784
 785 /* Store a byte C in the place pointed by DST and increment DST to the
 786    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 787    nonzero, store in an appropriate multibyte from.  The caller should
 788    declare and set the variables `dst' and `multibytep' appropriately
 789    in advance.  */
 790
 791 #define EMIT_ONE_BYTE(c)                \
 792   do {                                  \
 793     produced_chars++;                   \
 794     if (multibytep)                     \
 795       {                                 \
 796         int ch = (c);                   \
 797         if (ch >= 0x80)                 \
 798           ch = BYTE8_TO_CHAR (ch);      \
 799         CHAR_STRING_ADVANCE (ch, dst);  \
 800       }                                 \
 801     else                                \
 802       *dst++ = (c);                     \
 803   } while (0)
 804
 805
 806 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 807
 808 #define EMIT_TWO_BYTES(c1, c2)          \
 809   do {                                  \
 810     produced_chars += 2;                \
 811     if (multibytep)                     \
 812       {                                 \
 813         int ch;                         \
 814                                         \
 815         ch = (c1);                      \
 816         if (ch >= 0x80)                 \
 817           ch = BYTE8_TO_CHAR (ch);      \
 818         CHAR_STRING_ADVANCE (ch, dst);  \
 819         ch = (c2);                      \
 820         if (ch >= 0x80)                 \
 821           ch = BYTE8_TO_CHAR (ch);      \
 822         CHAR_STRING_ADVANCE (ch, dst);  \
 823       }                                 \
 824     else                                \
 825       {                                 \
 826         *dst++ = (c1);                  \
 827         *dst++ = (c2);                  \
 828       }                                 \
 829   } while (0)
 830
 831
 832 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 833   do {                                  \
 834     EMIT_ONE_BYTE (c1);                 \
 835     EMIT_TWO_BYTES (c2, c3);            \
 836   } while (0)
 837
 838
 839 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 840   do {                                          \
 841     EMIT_TWO_BYTES (c1, c2);                    \
 842     EMIT_TWO_BYTES (c3, c4);                    \
 843   } while (0)
 844
 845
 846 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 847   do {                                                                       \
 848     charset_map_loaded = 0;                                                  \
 849     c = DECODE_CHAR (charset, code);                                         \
 850     if (charset_map_loaded)                                                  \
 851       {                                                                      \
 852         const unsigned char *orig = coding->source;                          \
 853         EMACS_INT offset;                                                    \
 854                                                                              \
 855         coding_set_source (coding);                                          \
 856         offset = coding->source - orig;                                      \
 857         src += offset;                                                       \
 858         src_base += offset;                                                  \
 859         src_end += offset;                                                   \
 860       }                                                                      \
 861   } while (0)
 862
 863
 864 #define ASSURE_DESTINATION(bytes)                               \
 865   do {                                                          \
 866     if (dst + (bytes) >= dst_end)                               \
 867       {                                                         \
 868         int more_bytes = charbuf_end - charbuf + (bytes);       \
 869                                                                 \
 870         dst = alloc_destination (coding, more_bytes, dst);      \
 871         dst_end = coding->destination + coding->dst_bytes;      \
 872       }                                                         \
 873   } while (0)
 874
 875
 876
 877 static void
 878 coding_set_source (coding)
 879      struct coding_system *coding;
 880 {
 881   if (BUFFERP (coding->src_object))
 882     {
 883       struct buffer *buf = XBUFFER (coding->src_object);
 884
 885       if (coding->src_pos < 0)
 886         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
 887       else
 888         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
 889     }
 890   else if (STRINGP (coding->src_object))
 891     {
 892       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
 893     }
 894   else
 895     /* Otherwise, the source is C string and is never relocated
 896        automatically.  Thus we don't have to update anything.  */
 897     ;
 898 }
 899
 900 static void
 901 coding_set_destination (coding)
 902      struct coding_system *coding;
 903 {
 904   if (BUFFERP (coding->dst_object))
 905     {
 906       if (coding->src_pos < 0)
 907         {
 908           coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
 909           coding->dst_bytes = (GAP_END_ADDR
 910                                - (coding->src_bytes - coding->consumed)
 911                                - coding->destination);
 912         }
 913       else
 914         {
 915           /* We are sure that coding->dst_pos_byte is before the gap
 916              of the buffer. */
 917           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
 918                                  + coding->dst_pos_byte - 1);
 919           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
 920                                - coding->destination);
 921         }
 922     }
 923   else
 924     /* Otherwise, the destination is C string and is never relocated
 925        automatically.  Thus we don't have to update anything.  */
 926     ;
 927 }
 928
 929
 930 static void
 931 coding_alloc_by_realloc (coding, bytes)
 932      struct coding_system *coding;
 933      EMACS_INT bytes;
 934 {
 935   coding->destination = (unsigned char *) xrealloc (coding->destination,
 936                                                     coding->dst_bytes + bytes);
 937   coding->dst_bytes += bytes;
 938 }
 939
 940 static void
 941 coding_alloc_by_making_gap (coding, bytes)
 942      struct coding_system *coding;
 943      EMACS_INT bytes;
 944 {
 945   if (BUFFERP (coding->dst_object)
 946       && EQ (coding->src_object, coding->dst_object))
 947     {
 948       EMACS_INT add = coding->src_bytes - coding->consumed;
 949
 950       GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
 951       make_gap (bytes);
 952       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
 953     }
 954   else
 955     {
 956       Lisp_Object this_buffer;
 957
 958       this_buffer = Fcurrent_buffer ();
 959       set_buffer_internal (XBUFFER (coding->dst_object));
 960       make_gap (bytes);
 961       set_buffer_internal (XBUFFER (this_buffer));
 962     }
 963 }
 964
 965
 966 static unsigned char *
 967 alloc_destination (coding, nbytes, dst)
 968      struct coding_system *coding;
 969      EMACS_INT nbytes;
 970      unsigned char *dst;
 971 {
 972   EMACS_INT offset = dst - coding->destination;
 973
 974   if (BUFFERP (coding->dst_object))
 975     coding_alloc_by_making_gap (coding, nbytes);
 976   else
 977     coding_alloc_by_realloc (coding, nbytes);
 978   coding->result = CODING_RESULT_SUCCESS;
 979   coding_set_destination (coding);
 980   dst = coding->destination + offset;
 981   return dst;
 982 }
 983
 984 /** Macros for annotations.  */
 985
 986 /* Maximum length of annotation data (sum of annotations for
 987    composition and charset).  */
 988 #define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
 989
 990 /* An annotation data is stored in the array coding->charbuf in this
 991    format:
 992      [ -LENGTH ANNOTATION_MASK FROM TO ... ]
 993    LENGTH is the number of elements in the annotation.
 994    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
 995    FROM and TO specify the range of text annotated.  They are relative
 996    to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
 997
 998    The format of the following elements depend on ANNOTATION_MASK.
 999
1000    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1001    follows:
1002      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1003    METHOD is one of enum composition_method.
1004    Optionnal COMPOSITION-COMPONENTS are characters and composition
1005    rules.
1006
1007    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1008    follows.  */
1009
1010 #define ADD_ANNOTATION_DATA(buf, len, mask, from, to)   \
1011   do {                                                  \
1012     *(buf)++ = -(len);                                  \
1013     *(buf)++ = (mask);                                  \
1014     *(buf)++ = (from);                                  \
1015     *(buf)++ = (to);                                    \
1016     coding->annotated = 1;                              \
1017   } while (0);
1018
1019 #define ADD_COMPOSITION_DATA(buf, from, to, method)                           \
1020   do {                                                                        \
1021     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1022     *buf++ = method;                                                          \
1023   } while (0)
1024
1025
1026 #define ADD_CHARSET_DATA(buf, from, to, id)                               \
1027   do {                                                                    \
1028     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1029     *buf++ = id;                                                          \
1030   } while (0)
1031
1032 \f
1033 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1034
1035
1036
1037 \f
1038 /*** 3. UTF-8 ***/
1039
1040 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1041    Check if a text is encoded in UTF-8.  If it is, return 1, else
1042    return 0.  */
1043
1044 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1045 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1046 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1047 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1048 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1049 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1050
1051 static int
1052 detect_coding_utf_8 (coding, detect_info)
1053      struct coding_system *coding;
1054      struct coding_detection_info *detect_info;
1055 {
1056   const unsigned char *src = coding->source, *src_base = src;
1057   const unsigned char *src_end = coding->source + coding->src_bytes;
1058   int multibytep = coding->src_multibyte;
1059   int consumed_chars = 0;
1060   int found = 0;
1061   int incomplete;
1062
1063   detect_info->checked |= CATEGORY_MASK_UTF_8;
1064   /* A coding system of this category is always ASCII compatible.  */
1065   src += coding->head_ascii;
1066
1067   while (1)
1068     {
1069       int c, c1, c2, c3, c4;
1070
1071       incomplete = 0;
1072       ONE_MORE_BYTE (c);
1073       if (UTF_8_1_OCTET_P (c))
1074         continue;
1075       incomplete = 1;
1076       ONE_MORE_BYTE (c1);
1077       if (! UTF_8_EXTRA_OCTET_P (c1))
1078         break;
1079       if (UTF_8_2_OCTET_LEADING_P (c))
1080         {
1081           found = CATEGORY_MASK_UTF_8;
1082           continue;
1083         }
1084       ONE_MORE_BYTE (c2);
1085       if (! UTF_8_EXTRA_OCTET_P (c2))
1086         break;
1087       if (UTF_8_3_OCTET_LEADING_P (c))
1088         {
1089           found = CATEGORY_MASK_UTF_8;
1090           continue;
1091         }
1092       ONE_MORE_BYTE (c3);
1093       if (! UTF_8_EXTRA_OCTET_P (c3))
1094         break;
1095       if (UTF_8_4_OCTET_LEADING_P (c))
1096         {
1097           found = CATEGORY_MASK_UTF_8;
1098           continue;
1099         }
1100       ONE_MORE_BYTE (c4);
1101       if (! UTF_8_EXTRA_OCTET_P (c4))
1102         break;
1103       if (UTF_8_5_OCTET_LEADING_P (c))
1104         {
1105           found = CATEGORY_MASK_UTF_8;
1106           continue;
1107         }
1108       break;
1109     }
1110   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1111   return 0;
1112
1113  no_more_source:
1114   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1115     {
1116       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1117       return 0;
1118     }
1119   detect_info->found |= found;
1120   return 1;
1121 }
1122
1123
1124 static void
1125 decode_coding_utf_8 (coding)
1126      struct coding_system *coding;
1127 {
1128   const unsigned char *src = coding->source + coding->consumed;
1129   const unsigned char *src_end = coding->source + coding->src_bytes;
1130   const unsigned char *src_base;
1131   int *charbuf = coding->charbuf;
1132   int *charbuf_end = charbuf + coding->charbuf_size;
1133   int consumed_chars = 0, consumed_chars_base;
1134   int multibytep = coding->src_multibyte;
1135   Lisp_Object attr, eol_type, charset_list;
1136
1137   CODING_GET_INFO (coding, attr, eol_type, charset_list);
1138
1139   while (1)
1140     {
1141       int c, c1, c2, c3, c4, c5;
1142
1143       src_base = src;
1144       consumed_chars_base = consumed_chars;
1145
1146       if (charbuf >= charbuf_end)
1147         break;
1148
1149       ONE_MORE_BYTE (c1);
1150       if (UTF_8_1_OCTET_P(c1))
1151         {
1152           c = c1;
1153           if (c == '\r')
1154             {
1155               if (EQ (eol_type, Qdos))
1156                 {
1157                   if (src == src_end)
1158                     {
1159                       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1160                       goto no_more_source;
1161                     }
1162                   if (*src == '\n')
1163                     ONE_MORE_BYTE (c);
1164                 }
1165               else if (EQ (eol_type, Qmac))
1166                 c = '\n';
1167             }
1168         }
1169       else
1170         {
1171           ONE_MORE_BYTE (c2);
1172           if (! UTF_8_EXTRA_OCTET_P (c2))
1173             goto invalid_code;
1174           if (UTF_8_2_OCTET_LEADING_P (c1))
1175             {
1176               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1177               /* Reject overlong sequences here and below.  Encoders
1178                  producing them are incorrect, they can be misleading,
1179                  and they mess up read/write invariance.  */
1180               if (c < 128)
1181                 goto invalid_code;
1182             }
1183           else
1184             {
1185               ONE_MORE_BYTE (c3);
1186               if (! UTF_8_EXTRA_OCTET_P (c3))
1187                 goto invalid_code;
1188               if (UTF_8_3_OCTET_LEADING_P (c1))
1189                 {
1190                   c = (((c1 & 0xF) << 12)
1191                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1192                   if (c < 0x800
1193                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1194                     goto invalid_code;
1195                 }
1196               else
1197                 {
1198                   ONE_MORE_BYTE (c4);
1199                   if (! UTF_8_EXTRA_OCTET_P (c4))
1200                     goto invalid_code;
1201                   if (UTF_8_4_OCTET_LEADING_P (c1))
1202                     {
1203                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1204                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1205                     if (c < 0x10000)
1206                       goto invalid_code;
1207                     }
1208                   else
1209                     {
1210                       ONE_MORE_BYTE (c5);
1211                       if (! UTF_8_EXTRA_OCTET_P (c5))
1212                         goto invalid_code;
1213                       if (UTF_8_5_OCTET_LEADING_P (c1))
1214                         {
1215                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1216                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1217                                | (c5 & 0x3F));
1218                           if ((c > MAX_CHAR) || (c < 0x200000))
1219                             goto invalid_code;
1220                         }
1221                       else
1222                         goto invalid_code;
1223                     }
1224                 }
1225             }
1226         }
1227
1228       *charbuf++ = c;
1229       continue;
1230
1231     invalid_code:
1232       src = src_base;
1233       consumed_chars = consumed_chars_base;
1234       ONE_MORE_BYTE (c);
1235       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1236       coding->errors++;
1237     }
1238
1239  no_more_source:
1240   coding->consumed_char += consumed_chars_base;
1241   coding->consumed = src_base - coding->source;
1242   coding->charbuf_used = charbuf - coding->charbuf;
1243 }
1244
1245
1246 static int
1247 encode_coding_utf_8 (coding)
1248      struct coding_system *coding;
1249 {
1250   int multibytep = coding->dst_multibyte;
1251   int *charbuf = coding->charbuf;
1252   int *charbuf_end = charbuf + coding->charbuf_used;
1253   unsigned char *dst = coding->destination + coding->produced;
1254   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1255   int produced_chars = 0;
1256   int c;
1257
1258   if (multibytep)
1259     {
1260       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1261
1262       while (charbuf < charbuf_end)
1263         {
1264           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1265
1266           ASSURE_DESTINATION (safe_room);
1267           c = *charbuf++;
1268           if (CHAR_BYTE8_P (c))
1269             {
1270               c = CHAR_TO_BYTE8 (c);
1271               EMIT_ONE_BYTE (c);
1272             }
1273           else
1274             {
1275               CHAR_STRING_ADVANCE (c, pend);
1276               for (p = str; p < pend; p++)
1277                 EMIT_ONE_BYTE (*p);
1278             }
1279         }
1280     }
1281   else
1282     {
1283       int safe_room = MAX_MULTIBYTE_LENGTH;
1284
1285       while (charbuf < charbuf_end)
1286         {
1287           ASSURE_DESTINATION (safe_room);
1288           c = *charbuf++;
1289           dst += CHAR_STRING (c, dst);
1290           produced_chars++;
1291         }
1292     }
1293   coding->result = CODING_RESULT_SUCCESS;
1294   coding->produced_char += produced_chars;
1295   coding->produced = dst - coding->destination;
1296   return 0;
1297 }
1298
1299
1300 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1301    Check if a text is encoded in one of UTF-16 based coding systems.
1302    If it is, return 1, else return 0.  */
1303
1304 #define UTF_16_HIGH_SURROGATE_P(val) \
1305   (((val) & 0xFC00) == 0xD800)
1306
1307 #define UTF_16_LOW_SURROGATE_P(val) \
1308   (((val) & 0xFC00) == 0xDC00)
1309
1310 #define UTF_16_INVALID_P(val)   \
1311   (((val) == 0xFFFE)            \
1312    || ((val) == 0xFFFF)         \
1313    || UTF_16_LOW_SURROGATE_P (val))
1314
1315
1316 static int
1317 detect_coding_utf_16 (coding, detect_info)
1318      struct coding_system *coding;
1319      struct coding_detection_info *detect_info;
1320 {
1321   const unsigned char *src = coding->source, *src_base = src;
1322   const unsigned char *src_end = coding->source + coding->src_bytes;
1323   int multibytep = coding->src_multibyte;
1324   int consumed_chars = 0;
1325   int c1, c2;
1326
1327   detect_info->checked |= CATEGORY_MASK_UTF_16;
1328
1329   if (coding->mode & CODING_MODE_LAST_BLOCK
1330       && (coding->src_bytes & 1))
1331     {
1332       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1333       return 0;
1334     }
1335   ONE_MORE_BYTE (c1);
1336   ONE_MORE_BYTE (c2);
1337
1338   if ((c1 == 0xFF) && (c2 == 0xFE))
1339     {
1340       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1341                              | CATEGORY_MASK_UTF_16_AUTO);
1342       detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
1343     }
1344   else if ((c1 == 0xFE) && (c2 == 0xFF))
1345     {
1346       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1347                              | CATEGORY_MASK_UTF_16_AUTO);
1348       detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
1349     }
1350  no_more_source:
1351   return 1;
1352 }
1353
1354 static void
1355 decode_coding_utf_16 (coding)
1356      struct coding_system *coding;
1357 {
1358   const unsigned char *src = coding->source + coding->consumed;
1359   const unsigned char *src_end = coding->source + coding->src_bytes;
1360   const unsigned char *src_base;
1361   int *charbuf = coding->charbuf;
1362   int *charbuf_end = charbuf + coding->charbuf_size;
1363   int consumed_chars = 0, consumed_chars_base;
1364   int multibytep = coding->src_multibyte;
1365   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1366   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1367   int surrogate = CODING_UTF_16_SURROGATE (coding);
1368   Lisp_Object attr, eol_type, charset_list;
1369
1370   CODING_GET_INFO (coding, attr, eol_type, charset_list);
1371
1372   if (bom == utf_16_with_bom)
1373     {
1374       int c, c1, c2;
1375
1376       src_base = src;
1377       ONE_MORE_BYTE (c1);
1378       ONE_MORE_BYTE (c2);
1379       c = (c1 << 8) | c2;
1380
1381       if (endian == utf_16_big_endian
1382           ? c != 0xFEFF : c != 0xFFFE)
1383         {
1384           /* The first two bytes are not BOM.  Treat them as bytes
1385              for a normal character.  */
1386           src = src_base;
1387           coding->errors++;
1388         }
1389       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1390     }
1391   else if (bom == utf_16_detect_bom)
1392     {
1393       /* We have already tried to detect BOM and failed in
1394          detect_coding.  */
1395       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1396     }
1397
1398   while (1)
1399     {
1400       int c, c1, c2;
1401
1402       src_base = src;
1403       consumed_chars_base = consumed_chars;
1404
1405       if (charbuf + 2 >= charbuf_end)
1406         break;
1407
1408       ONE_MORE_BYTE (c1);
1409       ONE_MORE_BYTE (c2);
1410       c = (endian == utf_16_big_endian
1411            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1412       if (surrogate)
1413         {
1414           if (! UTF_16_LOW_SURROGATE_P (c))
1415             {
1416               if (endian == utf_16_big_endian)
1417                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1418               else
1419                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1420               *charbuf++ = c1;
1421               *charbuf++ = c2;
1422               coding->errors++;
1423               if (UTF_16_HIGH_SURROGATE_P (c))
1424                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1425               else
1426                 *charbuf++ = c;
1427             }
1428           else
1429             {
1430               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1431               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1432               *charbuf++ = c;
1433             }
1434         }
1435       else
1436         {
1437           if (UTF_16_HIGH_SURROGATE_P (c))
1438             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1439           else
1440             *charbuf++ = c;
1441         }
1442     }
1443
1444  no_more_source:
1445   coding->consumed_char += consumed_chars_base;
1446   coding->consumed = src_base - coding->source;
1447   coding->charbuf_used = charbuf - coding->charbuf;
1448 }
1449
1450 static int
1451 encode_coding_utf_16 (coding)
1452      struct coding_system *coding;
1453 {
1454   int multibytep = coding->dst_multibyte;
1455   int *charbuf = coding->charbuf;
1456   int *charbuf_end = charbuf + coding->charbuf_used;
1457   unsigned char *dst = coding->destination + coding->produced;
1458   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1459   int safe_room = 8;
1460   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1461   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1462   int produced_chars = 0;
1463   Lisp_Object attrs, eol_type, charset_list;
1464   int c;
1465
1466   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1467
1468   if (bom != utf_16_without_bom)
1469     {
1470       ASSURE_DESTINATION (safe_room);
1471       if (big_endian)
1472         EMIT_TWO_BYTES (0xFE, 0xFF);
1473       else
1474         EMIT_TWO_BYTES (0xFF, 0xFE);
1475       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1476     }
1477
1478   while (charbuf < charbuf_end)
1479     {
1480       ASSURE_DESTINATION (safe_room);
1481       c = *charbuf++;
1482       if (c >= MAX_UNICODE_CHAR)
1483         c = coding->default_char;
1484
1485       if (c < 0x10000)
1486         {
1487           if (big_endian)
1488             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1489           else
1490             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1491         }
1492       else
1493         {
1494           int c1, c2;
1495
1496           c -= 0x10000;
1497           c1 = (c >> 10) + 0xD800;
1498           c2 = (c & 0x3FF) + 0xDC00;
1499           if (big_endian)
1500             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1501           else
1502             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1503         }
1504     }
1505   coding->result = CODING_RESULT_SUCCESS;
1506   coding->produced = dst - coding->destination;
1507   coding->produced_char += produced_chars;
1508   return 0;
1509 }
1510
1511 \f
1512 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1513
1514 /* Emacs' internal format for representation of multiple character
1515    sets is a kind of multi-byte encoding, i.e. characters are
1516    represented by variable-length sequences of one-byte codes.
1517
1518    ASCII characters and control characters (e.g. `tab', `newline') are
1519    represented by one-byte sequences which are their ASCII codes, in
1520    the range 0x00 through 0x7F.
1521
1522    8-bit characters of the range 0x80..0x9F are represented by
1523    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1524    code + 0x20).
1525
1526    8-bit characters of the range 0xA0..0xFF are represented by
1527    one-byte sequences which are their 8-bit code.
1528
1529    The other characters are represented by a sequence of `base
1530    leading-code', optional `extended leading-code', and one or two
1531    `position-code's.  The length of the sequence is determined by the
1532    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1533    whereas extended leading-code and position-code take the range 0xA0
1534    through 0xFF.  See `charset.h' for more details about leading-code
1535    and position-code.
1536
1537    --- CODE RANGE of Emacs' internal format ---
1538    character set        range
1539    -------------        -----
1540    ascii                0x00..0x7F
1541    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1542    eight-bit-graphic    0xA0..0xBF
1543    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1544    ---------------------------------------------
1545
1546    As this is the internal character representation, the format is
1547    usually not used externally (i.e. in a file or in a data sent to a
1548    process).  But, it is possible to have a text externally in this
1549    format (i.e. by encoding by the coding system `emacs-mule').
1550
1551    In that case, a sequence of one-byte codes has a slightly different
1552    form.
1553
1554    At first, all characters in eight-bit-control are represented by
1555    one-byte sequences which are their 8-bit code.
1556
1557    Next, character composition data are represented by the byte
1558    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1559    where,
1560         METHOD is 0xF0 plus one of composition method (enum
1561         composition_method),
1562
1563         BYTES is 0xA0 plus a byte length of this composition data,
1564
1565         CHARS is 0x20 plus a number of characters composed by this
1566         data,
1567
1568         COMPONENTs are characters of multibye form or composition
1569         rules encoded by two-byte of ASCII codes.
1570
1571    In addition, for backward compatibility, the following formats are
1572    also recognized as composition data on decoding.
1573
1574    0x80 MSEQ ...
1575    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1576
1577    Here,
1578         MSEQ is a multibyte form but in these special format:
1579           ASCII: 0xA0 ASCII_CODE+0x80,
1580           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1581         RULE is a one byte code of the range 0xA0..0xF0 that
1582         represents a composition rule.
1583   */
1584
1585 char emacs_mule_bytes[256];
1586
1587 int
1588 emacs_mule_char (coding, src, nbytes, nchars, id)
1589      struct coding_system *coding;
1590      unsigned char *src;
1591      int *nbytes, *nchars, *id;
1592 {
1593   const unsigned char *src_end = coding->source + coding->src_bytes;
1594   const unsigned char *src_base = src;
1595   int multibytep = coding->src_multibyte;
1596   struct charset *charset;
1597   unsigned code;
1598   int c;
1599   int consumed_chars = 0;
1600
1601   ONE_MORE_BYTE (c);
1602   switch (emacs_mule_bytes[c])
1603     {
1604     case 2:
1605       if (! (charset = emacs_mule_charset[c]))
1606         goto invalid_code;
1607       ONE_MORE_BYTE (c);
1608       code = c & 0x7F;
1609       break;
1610
1611     case 3:
1612       if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1613           || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1614         {
1615           ONE_MORE_BYTE (c);
1616           if (! (charset = emacs_mule_charset[c]))
1617             goto invalid_code;
1618           ONE_MORE_BYTE (c);
1619           code = c & 0x7F;
1620         }
1621       else
1622         {
1623           if (! (charset = emacs_mule_charset[c]))
1624             goto invalid_code;
1625           ONE_MORE_BYTE (c);
1626           code = (c & 0x7F) << 8;
1627           ONE_MORE_BYTE (c);
1628           code |= c & 0x7F;
1629         }
1630       break;
1631
1632     case 4:
1633       ONE_MORE_BYTE (c);
1634       if (! (charset = emacs_mule_charset[c]))
1635         goto invalid_code;
1636       ONE_MORE_BYTE (c);
1637       code = (c & 0x7F) << 8;
1638       ONE_MORE_BYTE (c);
1639       code |= c & 0x7F;
1640       break;
1641
1642     case 1:
1643       code = c;
1644       charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1645                                  ? charset_ascii : charset_eight_bit);
1646       break;
1647
1648     default:
1649       abort ();
1650     }
1651   c = DECODE_CHAR (charset, code);
1652   if (c < 0)
1653     goto invalid_code;
1654   *nbytes = src - src_base;
1655   *nchars = consumed_chars;
1656   if (id)
1657     *id = charset->id;
1658   return c;
1659
1660  no_more_source:
1661   return -2;
1662
1663  invalid_code:
1664   return -1;
1665 }
1666
1667
1668 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1669    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1670    else return 0.  */
1671
1672 static int
1673 detect_coding_emacs_mule (coding, detect_info)
1674      struct coding_system *coding;
1675      struct coding_detection_info *detect_info;
1676 {
1677   const unsigned char *src = coding->source, *src_base = src;
1678   const unsigned char *src_end = coding->source + coding->src_bytes;
1679   int multibytep = coding->src_multibyte;
1680   int consumed_chars = 0;
1681   int c;
1682   int found = 0;
1683   int incomplete;
1684
1685   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1686   /* A coding system of this category is always ASCII compatible.  */
1687   src += coding->head_ascii;
1688
1689   while (1)
1690     {
1691       incomplete = 0;
1692       ONE_MORE_BYTE (c);
1693       incomplete = 1;
1694
1695       if (c == 0x80)
1696         {
1697           /* Perhaps the start of composite character.  We simple skip
1698              it because analyzing it is too heavy for detecting.  But,
1699              at least, we check that the composite character
1700              constitues of more than 4 bytes.  */
1701           const unsigned char *src_base;
1702
1703         repeat:
1704           src_base = src;
1705           do
1706             {
1707               ONE_MORE_BYTE (c);
1708             }
1709           while (c >= 0xA0);
1710
1711           if (src - src_base <= 4)
1712             break;
1713           found = CATEGORY_MASK_EMACS_MULE;
1714           if (c == 0x80)
1715             goto repeat;
1716         }
1717
1718       if (c < 0x80)
1719         {
1720           if (c < 0x20
1721               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1722             break;
1723         }
1724       else
1725         {
1726           const unsigned char *src_base = src - 1;
1727
1728           do
1729             {
1730               ONE_MORE_BYTE (c);
1731             }
1732           while (c >= 0xA0);
1733           if (src - src_base != emacs_mule_bytes[*src_base])
1734             break;
1735           found = CATEGORY_MASK_EMACS_MULE;
1736         }
1737     }
1738   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1739   return 0;
1740
1741  no_more_source:
1742   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1743     {
1744       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1745       return 0;
1746     }
1747   detect_info->found |= found;
1748   return 1;
1749 }
1750
1751
1752 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1753
1754 /* Decode a character represented as a component of composition
1755    sequence of Emacs 20/21 style at SRC.  Set C to that character and
1756    update SRC to the head of next character (or an encoded composition
1757    rule).  If SRC doesn't points a composition component, set C to -1.
1758    If SRC points an invalid byte sequence, global exit by a return
1759    value 0.  */
1760
1761 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
1762   if (1)                                                        \
1763     {                                                           \
1764       int c;                                                    \
1765       int nbytes, nchars;                                       \
1766                                                                 \
1767       if (src == src_end)                                       \
1768         break;                                                  \
1769       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1770       if (c < 0)                                                \
1771         {                                                       \
1772           if (c == -2)                                          \
1773             break;                                              \
1774           goto invalid_code;                                    \
1775         }                                                       \
1776       *buf++ = c;                                               \
1777       src += nbytes;                                            \
1778       consumed_chars += nchars;                                 \
1779     }                                                           \
1780   else
1781
1782
1783 /* Decode a composition rule represented as a component of composition
1784    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
1785    and increment BUF.  If SRC points an invalid byte sequence, set C
1786    to -1.  */
1787
1788 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
1789   do {                                                  \
1790     int c, gref, nref;                                  \
1791                                                         \
1792     if (src >= src_end)                                 \
1793       goto invalid_code;                                \
1794     ONE_MORE_BYTE_NO_CHECK (c);                         \
1795     c -= 0x20;                                          \
1796     if (c < 0 || c >= 81)                               \
1797       goto invalid_code;                                \
1798                                                         \
1799     gref = c / 9, nref = c % 9;                         \
1800     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1801   } while (0)
1802
1803
1804 /* Decode a composition rule represented as a component of composition
1805    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
1806    and increment BUF.  If SRC points an invalid byte sequence, set C
1807    to -1.  */
1808
1809 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
1810   do {                                                  \
1811     int gref, nref;                                     \
1812                                                         \
1813     if (src + 1>= src_end)                              \
1814       goto invalid_code;                                \
1815     ONE_MORE_BYTE_NO_CHECK (gref);                      \
1816     gref -= 0x20;                                       \
1817     ONE_MORE_BYTE_NO_CHECK (nref);                      \
1818     nref -= 0x20;                                       \
1819     if (gref < 0 || gref >= 81                          \
1820         || nref < 0 || nref >= 81)                      \
1821       goto invalid_code;                                \
1822     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1823   } while (0)
1824
1825
1826 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
1827   do {                                                                  \
1828     /* Emacs 21 style format.  The first three bytes at SRC are         \
1829        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
1830        the byte length of this composition information, CHARS is the    \
1831        number of characters composed by this composition.  */           \
1832     enum composition_method method = c - 0xF2;                          \
1833     int *charbuf_base = charbuf;                                        \
1834     int from, to;                                                       \
1835     int consumed_chars_limit;                                           \
1836     int nbytes, nchars;                                                 \
1837                                                                         \
1838     ONE_MORE_BYTE (c);                                                  \
1839     nbytes = c - 0xA0;                                                  \
1840     if (nbytes < 3)                                                     \
1841       goto invalid_code;                                                \
1842     ONE_MORE_BYTE (c);                                                  \
1843     nchars = c - 0xA0;                                                  \
1844     from = coding->produced + char_offset;                              \
1845     to = from + nchars;                                                 \
1846     ADD_COMPOSITION_DATA (charbuf, from, to, method);                   \
1847     consumed_chars_limit = consumed_chars_base + nbytes;                \
1848     if (method != COMPOSITION_RELATIVE)                                 \
1849       {                                                                 \
1850         int i = 0;                                                      \
1851         while (consumed_chars < consumed_chars_limit)                   \
1852           {                                                             \
1853             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
1854               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
1855             else                                                        \
1856               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
1857             i++;                                                        \
1858           }                                                             \
1859         if (consumed_chars < consumed_chars_limit)                      \
1860           goto invalid_code;                                            \
1861         charbuf_base[0] -= i;                                           \
1862       }                                                                 \
1863   } while (0)
1864
1865
1866 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)            \
1867   do {                                                          \
1868     /* Emacs 20 style format for relative composition.  */      \
1869     /* Store multibyte form of characters to be composed.  */   \
1870     enum composition_method method = COMPOSITION_RELATIVE;      \
1871     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1872     int *buf = components;                                      \
1873     int i, j;                                                   \
1874     int from, to;                                               \
1875                                                                 \
1876     src = src_base;                                             \
1877     ONE_MORE_BYTE (c);          /* skip 0x80 */                 \
1878     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1879       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                 \
1880     if (i < 2)                                                  \
1881       goto invalid_code;                                        \
1882     from = coding->produced_char + char_offset;                 \
1883     to = from + i;                                              \
1884     ADD_COMPOSITION_DATA (charbuf, from, to, method);           \
1885     for (j = 0; j < i; j++)                                     \
1886       *charbuf++ = components[j];                               \
1887   } while (0)
1888
1889
1890 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
1891   do {                                                          \
1892     /* Emacs 20 style format for rule-base composition.  */     \
1893     /* Store multibyte form of characters to be composed.  */   \
1894     enum composition_method method = COMPOSITION_WITH_RULE;     \
1895     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1896     int *buf = components;                                      \
1897     int i, j;                                                   \
1898     int from, to;                                               \
1899                                                                 \
1900     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
1901     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1902       {                                                         \
1903         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
1904         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
1905       }                                                         \
1906     if (i < 1 || (buf - components) % 2 == 0)                   \
1907       goto invalid_code;                                        \
1908     if (charbuf + i + (i / 2) + 1 < charbuf_end)                \
1909       goto no_more_source;                                      \
1910     from = coding->produced_char + char_offset;                 \
1911     to = from + i;                                              \
1912     ADD_COMPOSITION_DATA (buf, from, to, method);               \
1913     for (j = 0; j < i; j++)                                     \
1914       *charbuf++ = components[j];                               \
1915     for (j = 0; j < i; j += 2)                                  \
1916       *charbuf++ = components[j];                               \
1917   } while (0)
1918
1919
1920 static void
1921 decode_coding_emacs_mule (coding)
1922      struct coding_system *coding;
1923 {
1924   const unsigned char *src = coding->source + coding->consumed;
1925   const unsigned char *src_end = coding->source + coding->src_bytes;
1926   const unsigned char *src_base;
1927   int *charbuf = coding->charbuf;
1928   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
1929   int consumed_chars = 0, consumed_chars_base;
1930   int multibytep = coding->src_multibyte;
1931   Lisp_Object attrs, eol_type, charset_list;
1932   int char_offset = coding->produced_char;
1933   int last_offset = char_offset;
1934   int last_id = charset_ascii;
1935
1936   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1937
1938   while (1)
1939     {
1940       int c;
1941
1942       src_base = src;
1943       consumed_chars_base = consumed_chars;
1944
1945       if (charbuf >= charbuf_end)
1946         break;
1947
1948       ONE_MORE_BYTE (c);
1949
1950       if (c < 0x80)
1951         {
1952           if (c == '\r')
1953             {
1954               if (EQ (eol_type, Qdos))
1955                 {
1956                   if (src == src_end)
1957                     {
1958                       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1959                       goto no_more_source;
1960                     }
1961                   if (*src == '\n')
1962                     ONE_MORE_BYTE (c);
1963                 }
1964               else if (EQ (eol_type, Qmac))
1965                 c = '\n';
1966             }
1967           *charbuf++ = c;
1968           char_offset++;
1969         }
1970       else if (c == 0x80)
1971         {
1972           ONE_MORE_BYTE (c);
1973           if (c - 0xF2 >= COMPOSITION_RELATIVE
1974               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
1975             DECODE_EMACS_MULE_21_COMPOSITION (c);
1976           else if (c < 0xC0)
1977             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1978           else if (c == 0xFF)
1979             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1980           else
1981             goto invalid_code;
1982         }
1983       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1984         {
1985           int nbytes, nchars;
1986           int id;
1987
1988           src = src_base;
1989           consumed_chars = consumed_chars_base;
1990           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
1991           if (c < 0)
1992             {
1993               if (c == -2)
1994                 break;
1995               goto invalid_code;
1996             }
1997           if (last_id != id)
1998             {
1999               if (last_id != charset_ascii)
2000                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2001               last_id = id;
2002               last_offset = char_offset;
2003             }
2004           *charbuf++ = c;
2005           src += nbytes;
2006           consumed_chars += nchars;
2007           char_offset++;
2008         }
2009       continue;
2010
2011     invalid_code:
2012       src = src_base;
2013       consumed_chars = consumed_chars_base;
2014       ONE_MORE_BYTE (c);
2015       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2016       char_offset++;
2017       coding->errors++;
2018     }
2019
2020  no_more_source:
2021   if (last_id != charset_ascii)
2022     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2023   coding->consumed_char += consumed_chars_base;
2024   coding->consumed = src_base - coding->source;
2025   coding->charbuf_used = charbuf - coding->charbuf;
2026 }
2027
2028
2029 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2030   do {                                          \
2031     if (id < 0xA0)                              \
2032       codes[0] = id, codes[1] = 0;              \
2033     else if (id < 0xE0)                         \
2034       codes[0] = 0x9A, codes[1] = id;           \
2035     else if (id < 0xF0)                         \
2036       codes[0] = 0x9B, codes[1] = id;           \
2037     else if (id < 0xF5)                         \
2038       codes[0] = 0x9C, codes[1] = id;           \
2039     else                                        \
2040       codes[0] = 0x9D, codes[1] = id;           \
2041   } while (0);
2042
2043
2044 static int
2045 encode_coding_emacs_mule (coding)
2046      struct coding_system *coding;
2047 {
2048   int multibytep = coding->dst_multibyte;
2049   int *charbuf = coding->charbuf;
2050   int *charbuf_end = charbuf + coding->charbuf_used;
2051   unsigned char *dst = coding->destination + coding->produced;
2052   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2053   int safe_room = 8;
2054   int produced_chars = 0;
2055   Lisp_Object attrs, eol_type, charset_list;
2056   int c;
2057   int preferred_charset_id = -1;
2058
2059   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2060
2061   while (charbuf < charbuf_end)
2062     {
2063       ASSURE_DESTINATION (safe_room);
2064       c = *charbuf++;
2065
2066       if (c < 0)
2067         {
2068           /* Handle an annotation.  */
2069           switch (*charbuf)
2070             {
2071             case CODING_ANNOTATE_COMPOSITION_MASK:
2072               /* Not yet implemented.  */
2073               break;
2074             case CODING_ANNOTATE_CHARSET_MASK:
2075               preferred_charset_id = charbuf[3];
2076               if (preferred_charset_id >= 0
2077                   && NILP (Fmemq (make_number (preferred_charset_id),
2078                                   charset_list)))
2079                 preferred_charset_id = -1;
2080               break;
2081             default:
2082               abort ();
2083             }
2084           charbuf += -c - 1;
2085           continue;
2086         }
2087
2088       if (ASCII_CHAR_P (c))
2089         EMIT_ONE_ASCII_BYTE (c);
2090       else if (CHAR_BYTE8_P (c))
2091         {
2092           c = CHAR_TO_BYTE8 (c);
2093           EMIT_ONE_BYTE (c);
2094         }
2095       else
2096         {
2097           struct charset *charset;
2098           unsigned code;
2099           int dimension;
2100           int emacs_mule_id;
2101           unsigned char leading_codes[2];
2102
2103           if (preferred_charset_id >= 0)
2104             {
2105               charset = CHARSET_FROM_ID (preferred_charset_id);
2106               if (! CHAR_CHARSET_P (c, charset))
2107                 charset = char_charset (c, charset_list, NULL);
2108             }
2109           else
2110             charset = char_charset (c, charset_list, &code);
2111           if (! charset)
2112             {
2113               c = coding->default_char;
2114               if (ASCII_CHAR_P (c))
2115                 {
2116                   EMIT_ONE_ASCII_BYTE (c);
2117                   continue;
2118                 }
2119               charset = char_charset (c, charset_list, &code);
2120             }
2121           dimension = CHARSET_DIMENSION (charset);
2122           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2123           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2124           EMIT_ONE_BYTE (leading_codes[0]);
2125           if (leading_codes[1])
2126             EMIT_ONE_BYTE (leading_codes[1]);
2127           if (dimension == 1)
2128             EMIT_ONE_BYTE (code);
2129           else
2130             {
2131               EMIT_ONE_BYTE (code >> 8);
2132               EMIT_ONE_BYTE (code & 0xFF);
2133             }
2134         }
2135     }
2136   coding->result = CODING_RESULT_SUCCESS;
2137   coding->produced_char += produced_chars;
2138   coding->produced = dst - coding->destination;
2139   return 0;
2140 }
2141
2142 \f
2143 /*** 7. ISO2022 handlers ***/
2144
2145 /* The following note describes the coding system ISO2022 briefly.
2146    Since the intention of this note is to help understand the
2147    functions in this file, some parts are NOT ACCURATE or are OVERLY
2148    SIMPLIFIED.  For thorough understanding, please refer to the
2149    original document of ISO2022.  This is equivalent to the standard
2150    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2151
2152    ISO2022 provides many mechanisms to encode several character sets
2153    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2154    is encoded using bytes less than 128.  This may make the encoded
2155    text a little bit longer, but the text passes more easily through
2156    several types of gateway, some of which strip off the MSB (Most
2157    Significant Bit).
2158
2159    There are two kinds of character sets: control character sets and
2160    graphic character sets.  The former contain control characters such
2161    as `newline' and `escape' to provide control functions (control
2162    functions are also provided by escape sequences).  The latter
2163    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2164    two control character sets and many graphic character sets.
2165
2166    Graphic character sets are classified into one of the following
2167    four classes, according to the number of bytes (DIMENSION) and
2168    number of characters in one dimension (CHARS) of the set:
2169    - DIMENSION1_CHARS94
2170    - DIMENSION1_CHARS96
2171    - DIMENSION2_CHARS94
2172    - DIMENSION2_CHARS96
2173
2174    In addition, each character set is assigned an identification tag,
2175    unique for each set, called the "final character" (denoted as <F>
2176    hereafter).  The <F> of each character set is decided by ECMA(*)
2177    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2178    (0x30..0x3F are for private use only).
2179
2180    Note (*): ECMA = European Computer Manufacturers Association
2181
2182    Here are examples of graphic character sets [NAME(<F>)]:
2183         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2184         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2185         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2186         o DIMENSION2_CHARS96 -- none for the moment
2187
2188    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2189         C0 [0x00..0x1F] -- control character plane 0
2190         GL [0x20..0x7F] -- graphic character plane 0
2191         C1 [0x80..0x9F] -- control character plane 1
2192         GR [0xA0..0xFF] -- graphic character plane 1
2193
2194    A control character set is directly designated and invoked to C0 or
2195    C1 by an escape sequence.  The most common case is that:
2196    - ISO646's  control character set is designated/invoked to C0, and
2197    - ISO6429's control character set is designated/invoked to C1,
2198    and usually these designations/invocations are omitted in encoded
2199    text.  In a 7-bit environment, only C0 can be used, and a control
2200    character for C1 is encoded by an appropriate escape sequence to
2201    fit into the environment.  All control characters for C1 are
2202    defined to have corresponding escape sequences.
2203
2204    A graphic character set is at first designated to one of four
2205    graphic registers (G0 through G3), then these graphic registers are
2206    invoked to GL or GR.  These designations and invocations can be
2207    done independently.  The most common case is that G0 is invoked to
2208    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2209    these invocations and designations are omitted in encoded text.
2210    In a 7-bit environment, only GL can be used.
2211
2212    When a graphic character set of CHARS94 is invoked to GL, codes
2213    0x20 and 0x7F of the GL area work as control characters SPACE and
2214    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2215    be used.
2216
2217    There are two ways of invocation: locking-shift and single-shift.
2218    With locking-shift, the invocation lasts until the next different
2219    invocation, whereas with single-shift, the invocation affects the
2220    following character only and doesn't affect the locking-shift
2221    state.  Invocations are done by the following control characters or
2222    escape sequences:
2223
2224    ----------------------------------------------------------------------
2225    abbrev  function                  cntrl escape seq   description
2226    ----------------------------------------------------------------------
2227    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2228    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2229    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2230    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2231    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2232    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2233    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2234    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2235    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2236    ----------------------------------------------------------------------
2237    (*) These are not used by any known coding system.
2238
2239    Control characters for these functions are defined by macros
2240    ISO_CODE_XXX in `coding.h'.
2241
2242    Designations are done by the following escape sequences:
2243    ----------------------------------------------------------------------
2244    escape sequence      description
2245    ----------------------------------------------------------------------
2246    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2247    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2248    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2249    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2250    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2251    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2252    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2253    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2254    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2255    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2256    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2257    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2258    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2259    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2260    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2261    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2262    ----------------------------------------------------------------------
2263
2264    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2265    of dimension 1, chars 94, and final character <F>, etc...
2266
2267    Note (*): Although these designations are not allowed in ISO2022,
2268    Emacs accepts them on decoding, and produces them on encoding
2269    CHARS96 character sets in a coding system which is characterized as
2270    7-bit environment, non-locking-shift, and non-single-shift.
2271
2272    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2273    '(' must be omitted.  We refer to this as "short-form" hereafter.
2274
2275    Now you may notice that there are a lot of ways of encoding the
2276    same multilingual text in ISO2022.  Actually, there exist many
2277    coding systems such as Compound Text (used in X11's inter client
2278    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2279    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2280    localized platforms), and all of these are variants of ISO2022.
2281
2282    In addition to the above, Emacs handles two more kinds of escape
2283    sequences: ISO6429's direction specification and Emacs' private
2284    sequence for specifying character composition.
2285
2286    ISO6429's direction specification takes the following form:
2287         o CSI ']'      -- end of the current direction
2288         o CSI '0' ']'  -- end of the current direction
2289         o CSI '1' ']'  -- start of left-to-right text
2290         o CSI '2' ']'  -- start of right-to-left text
2291    The control character CSI (0x9B: control sequence introducer) is
2292    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2293
2294    Character composition specification takes the following form:
2295         o ESC '0' -- start relative composition
2296         o ESC '1' -- end composition
2297         o ESC '2' -- start rule-base composition (*)
2298         o ESC '3' -- start relative composition with alternate chars  (**)
2299         o ESC '4' -- start rule-base composition with alternate chars  (**)
2300   Since these are not standard escape sequences of any ISO standard,
2301   the use of them with these meanings is restricted to Emacs only.
2302
2303   (*) This form is used only in Emacs 20.7 and older versions,
2304   but newer versions can safely decode it.
2305   (**) This form is used only in Emacs 21.1 and newer versions,
2306   and older versions can't decode it.
2307
2308   Here's a list of example usages of these composition escape
2309   sequences (categorized by `enum composition_method').
2310
2311   COMPOSITION_RELATIVE:
2312         ESC 0 CHAR [ CHAR ] ESC 1
2313   COMPOSITION_WITH_RULE:
2314         ESC 2 CHAR [ RULE CHAR ] ESC 1
2315   COMPOSITION_WITH_ALTCHARS:
2316         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2317   COMPOSITION_WITH_RULE_ALTCHARS:
2318         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2319
2320 enum iso_code_class_type iso_code_class[256];
2321
2322 #define SAFE_CHARSET_P(coding, id)      \
2323   ((id) <= (coding)->max_charset_id     \
2324    && (coding)->safe_charsets[id] >= 0)
2325
2326
2327 #define SHIFT_OUT_OK(category)  \
2328   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2329
2330 static void
2331 setup_iso_safe_charsets (attrs)
2332      Lisp_Object attrs;
2333 {
2334   Lisp_Object charset_list, safe_charsets;
2335   Lisp_Object request;
2336   Lisp_Object reg_usage;
2337   Lisp_Object tail;
2338   int reg94, reg96;
2339   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2340   int max_charset_id;
2341
2342   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2343   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2344       && ! EQ (charset_list, Viso_2022_charset_list))
2345     {
2346       CODING_ATTR_CHARSET_LIST (attrs)
2347         = charset_list = Viso_2022_charset_list;
2348       ASET (attrs, coding_attr_safe_charsets, Qnil);
2349     }
2350
2351   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2352     return;
2353
2354   max_charset_id = 0;
2355   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2356     {
2357       int id = XINT (XCAR (tail));
2358       if (max_charset_id < id)
2359         max_charset_id = id;
2360     }
2361
2362   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2363                                 make_number (255));
2364   request = AREF (attrs, coding_attr_iso_request);
2365   reg_usage = AREF (attrs, coding_attr_iso_usage);
2366   reg94 = XINT (XCAR (reg_usage));
2367   reg96 = XINT (XCDR (reg_usage));
2368
2369   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2370     {
2371       Lisp_Object id;
2372       Lisp_Object reg;
2373       struct charset *charset;
2374
2375       id = XCAR (tail);
2376       charset = CHARSET_FROM_ID (XINT (id));
2377       reg = Fcdr (Fassq (id, request));
2378       if (! NILP (reg))
2379         SSET (safe_charsets, XINT (id), XINT (reg));
2380       else if (charset->iso_chars_96)
2381         {
2382           if (reg96 < 4)
2383             SSET (safe_charsets, XINT (id), reg96);
2384         }
2385       else
2386         {
2387           if (reg94 < 4)
2388             SSET (safe_charsets, XINT (id), reg94);
2389         }
2390     }
2391   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2392 }
2393
2394
2395 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2396    Check if a text is encoded in one of ISO-2022 based codig systems.
2397    If it is, return 1, else return 0.  */
2398
2399 static int
2400 detect_coding_iso_2022 (coding, detect_info)
2401      struct coding_system *coding;
2402      struct coding_detection_info *detect_info;
2403 {
2404   const unsigned char *src = coding->source, *src_base = src;
2405   const unsigned char *src_end = coding->source + coding->src_bytes;
2406   int multibytep = coding->src_multibyte;
2407   int single_shifting = 0;
2408   int id;
2409   int c, c1;
2410   int consumed_chars = 0;
2411   int i;
2412   int rejected = 0;
2413   int found = 0;
2414
2415   detect_info->checked |= CATEGORY_MASK_ISO;
2416
2417   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2418     {
2419       struct coding_system *this = &(coding_categories[i]);
2420       Lisp_Object attrs, val;
2421
2422       attrs = CODING_ID_ATTRS (this->id);
2423       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2424           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2425         setup_iso_safe_charsets (attrs);
2426       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2427       this->max_charset_id = SCHARS (val) - 1;
2428       this->safe_charsets = (char *) SDATA (val);
2429     }
2430
2431   /* A coding system of this category is always ASCII compatible.  */
2432   src += coding->head_ascii;
2433
2434   while (rejected != CATEGORY_MASK_ISO)
2435     {
2436       ONE_MORE_BYTE (c);
2437       switch (c)
2438         {
2439         case ISO_CODE_ESC:
2440           if (inhibit_iso_escape_detection)
2441             break;
2442           single_shifting = 0;
2443           ONE_MORE_BYTE (c);
2444           if (c >= '(' && c <= '/')
2445             {
2446               /* Designation sequence for a charset of dimension 1.  */
2447               ONE_MORE_BYTE (c1);
2448               if (c1 < ' ' || c1 >= 0x80
2449                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2450                 /* Invalid designation sequence.  Just ignore.  */
2451                 break;
2452             }
2453           else if (c == '$')
2454             {
2455               /* Designation sequence for a charset of dimension 2.  */
2456               ONE_MORE_BYTE (c);
2457               if (c >= '@' && c <= 'B')
2458                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2459                 id = iso_charset_table[1][0][c];
2460               else if (c >= '(' && c <= '/')
2461                 {
2462                   ONE_MORE_BYTE (c1);
2463                   if (c1 < ' ' || c1 >= 0x80
2464                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2465                     /* Invalid designation sequence.  Just ignore.  */
2466                     break;
2467                 }
2468               else
2469                 /* Invalid designation sequence.  Just ignore it.  */
2470                 break;
2471             }
2472           else if (c == 'N' || c == 'O')
2473             {
2474               /* ESC <Fe> for SS2 or SS3.  */
2475               single_shifting = 1;
2476               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2477               break;
2478             }
2479           else if (c >= '0' && c <= '4')
2480             {
2481               /* ESC <Fp> for start/end composition.  */
2482               found |= CATEGORY_MASK_ISO;
2483               break;
2484             }
2485           else
2486             {
2487               /* Invalid escape sequence.  Just ignore it.  */
2488               break;
2489             }
2490
2491           /* We found a valid designation sequence for CHARSET.  */
2492           rejected |= CATEGORY_MASK_ISO_8BIT;
2493           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2494                               id))
2495             found |= CATEGORY_MASK_ISO_7;
2496           else
2497             rejected |= CATEGORY_MASK_ISO_7;
2498           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2499                               id))
2500             found |= CATEGORY_MASK_ISO_7_TIGHT;
2501           else
2502             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2503           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2504                               id))
2505             found |= CATEGORY_MASK_ISO_7_ELSE;
2506           else
2507             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2508           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2509                               id))
2510             found |= CATEGORY_MASK_ISO_8_ELSE;
2511           else
2512             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2513           break;
2514
2515         case ISO_CODE_SO:
2516         case ISO_CODE_SI:
2517           /* Locking shift out/in.  */
2518           if (inhibit_iso_escape_detection)
2519             break;
2520           single_shifting = 0;
2521           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2522           found |= CATEGORY_MASK_ISO_ELSE;
2523           break;
2524
2525         case ISO_CODE_CSI:
2526           /* Control sequence introducer.  */
2527           single_shifting = 0;
2528           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2529           found |= CATEGORY_MASK_ISO_8_ELSE;
2530           goto check_extra_latin;
2531
2532
2533         case ISO_CODE_SS2:
2534         case ISO_CODE_SS3:
2535           /* Single shift.   */
2536           if (inhibit_iso_escape_detection)
2537             break;
2538           single_shifting = 1;
2539           rejected |= CATEGORY_MASK_ISO_7BIT;
2540           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2541               & CODING_ISO_FLAG_SINGLE_SHIFT)
2542             found |= CATEGORY_MASK_ISO_8_1;
2543           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2544               & CODING_ISO_FLAG_SINGLE_SHIFT)
2545             found |= CATEGORY_MASK_ISO_8_2;
2546           goto check_extra_latin;
2547
2548         default:
2549           if (c < 0x80)
2550             {
2551               single_shifting = 0;
2552               break;
2553             }
2554           if (c >= 0xA0)
2555             {
2556               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2557               found |= CATEGORY_MASK_ISO_8_1;
2558               /* Check the length of succeeding codes of the range
2559                  0xA0..0FF.  If the byte length is even, we include
2560                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2561                  only when we are not single shifting.  */
2562               if (! single_shifting
2563                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2564                 {
2565                   int i = 1;
2566                   while (src < src_end)
2567                     {
2568                       ONE_MORE_BYTE (c);
2569                       if (c < 0xA0)
2570                         break;
2571                       i++;
2572                     }
2573
2574                   if (i & 1 && src < src_end)
2575                     rejected |= CATEGORY_MASK_ISO_8_2;
2576                   else
2577                     found |= CATEGORY_MASK_ISO_8_2;
2578                 }
2579               break;
2580             }
2581         check_extra_latin:
2582           single_shifting = 0;
2583           if (! VECTORP (Vlatin_extra_code_table)
2584               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2585             {
2586               rejected = CATEGORY_MASK_ISO;
2587               break;
2588             }
2589           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2590               & CODING_ISO_FLAG_LATIN_EXTRA)
2591             found |= CATEGORY_MASK_ISO_8_1;
2592           else
2593             rejected |= CATEGORY_MASK_ISO_8_1;
2594           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2595               & CODING_ISO_FLAG_LATIN_EXTRA)
2596             found |= CATEGORY_MASK_ISO_8_2;
2597           else
2598             rejected |= CATEGORY_MASK_ISO_8_2;
2599         }
2600     }
2601   detect_info->rejected |= CATEGORY_MASK_ISO;
2602   return 0;
2603
2604  no_more_source:
2605   detect_info->rejected |= rejected;
2606   detect_info->found |= (found & ~rejected);
2607   return 1;
2608 }
2609
2610
2611 /* Set designation state into CODING.  */
2612 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2613   do {                                                                  \
2614     int id, prev;                                                       \
2615                                                                         \
2616     if (final < '0' || final >= 128                                     \
2617         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2618         || !SAFE_CHARSET_P (coding, id))                                \
2619       {                                                                 \
2620         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2621         goto invalid_code;                                              \
2622       }                                                                 \
2623     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2624     if (id == charset_jisx0201_roman)                                   \
2625       {                                                                 \
2626         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2627           id = charset_ascii;                                           \
2628       }                                                                 \
2629     else if (id == charset_jisx0208_1978)                               \
2630       {                                                                 \
2631         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2632           id = charset_jisx0208;                                        \
2633       }                                                                 \
2634     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2635     /* If there was an invalid designation to REG previously, and this  \
2636        designation is ASCII to REG, we should keep this designation     \
2637        sequence.  */                                                    \
2638     if (prev == -2 && id == charset_ascii)                              \
2639       goto invalid_code;                                                \
2640   } while (0)
2641
2642
2643 #define MAYBE_FINISH_COMPOSITION()                              \
2644   do {                                                          \
2645     int i;                                                      \
2646     if (composition_state == COMPOSING_NO)                      \
2647       break;                                                    \
2648     /* It is assured that we have enough room for producing     \
2649        characters stored in the table `components'.  */         \
2650     if (charbuf + component_idx > charbuf_end)                  \
2651       goto no_more_source;                                      \
2652     composition_state = COMPOSING_NO;                           \
2653     if (method == COMPOSITION_RELATIVE                          \
2654         || method == COMPOSITION_WITH_ALTCHARS)                 \
2655       {                                                         \
2656         for (i = 0; i < component_idx; i++)                     \
2657           *charbuf++ = components[i];                           \
2658         char_offset += component_idx;                           \
2659       }                                                         \
2660     else                                                        \
2661       {                                                         \
2662         for (i = 0; i < component_idx; i += 2)                  \
2663           *charbuf++ = components[i];                           \
2664         char_offset += (component_idx / 2) + 1;                 \
2665       }                                                         \
2666   } while (0)
2667
2668
2669 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2670    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2671    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2672    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2673    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2674   */
2675
2676 #define DECODE_COMPOSITION_START(c1)                                    \
2677   do {                                                                  \
2678     if (c1 == '0'                                                       \
2679         && composition_state == COMPOSING_COMPONENT_RULE)               \
2680       {                                                                 \
2681         component_len = component_idx;                                  \
2682         composition_state = COMPOSING_CHAR;                             \
2683       }                                                                 \
2684     else                                                                \
2685       {                                                                 \
2686         const unsigned char *p;                                         \
2687                                                                         \
2688         MAYBE_FINISH_COMPOSITION ();                                    \
2689         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
2690           goto no_more_source;                                          \
2691         for (p = src; p < src_end - 1; p++)                             \
2692           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
2693             break;                                                      \
2694         if (p == src_end - 1)                                           \
2695           {                                                             \
2696             if (coding->mode & CODING_MODE_LAST_BLOCK)                  \
2697               goto invalid_code;                                        \
2698             goto no_more_source;                                        \
2699           }                                                             \
2700                                                                         \
2701         /* This is surely the start of a composition.  */               \
2702         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
2703                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
2704                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
2705                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
2706         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
2707                              : COMPOSING_COMPONENT_CHAR);               \
2708         component_idx = component_len = 0;                              \
2709       }                                                                 \
2710   } while (0)
2711
2712
2713 /* Handle compositoin end sequence ESC 1.  */
2714
2715 #define DECODE_COMPOSITION_END()                                        \
2716   do {                                                                  \
2717     int nchars = (component_len > 0 ? component_idx - component_len     \
2718                   : method == COMPOSITION_RELATIVE ? component_idx      \
2719                   : (component_idx + 1) / 2);                           \
2720     int i;                                                              \
2721     int *saved_charbuf = charbuf;                                       \
2722     int from = char_offset;                                             \
2723     int to = from + nchars;                                             \
2724                                                                         \
2725     ADD_COMPOSITION_DATA (charbuf, from, to, method);                   \
2726     if (method != COMPOSITION_RELATIVE)                                 \
2727       {                                                                 \
2728         if (component_len == 0)                                         \
2729           for (i = 0; i < component_idx; i++)                           \
2730             *charbuf++ = components[i];                                 \
2731         else                                                            \
2732           for (i = 0; i < component_len; i++)                           \
2733             *charbuf++ = components[i];                                 \
2734         *saved_charbuf = saved_charbuf - charbuf;                       \
2735       }                                                                 \
2736     if (method == COMPOSITION_WITH_RULE)                                \
2737       for (i = 0; i < component_idx; i += 2, char_offset++)             \
2738         *charbuf++ = components[i];                                     \
2739     else                                                                \
2740       for (i = component_len; i < component_idx; i++, char_offset++)    \
2741         *charbuf++ = components[i];                                     \
2742     coding->annotated = 1;                                              \
2743     composition_state = COMPOSING_NO;                                   \
2744   } while (0)
2745
2746
2747 /* Decode a composition rule from the byte C1 (and maybe one more byte
2748    from SRC) and store one encoded composition rule in
2749    coding->cmp_data.  */
2750
2751 #define DECODE_COMPOSITION_RULE(c1)                                     \
2752   do {                                                                  \
2753     (c1) -= 32;                                                         \
2754     if (c1 < 81)                /* old format (before ver.21) */        \
2755       {                                                                 \
2756         int gref = (c1) / 9;                                            \
2757         int nref = (c1) % 9;                                            \
2758         if (gref == 4) gref = 10;                                       \
2759         if (nref == 4) nref = 10;                                       \
2760         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
2761       }                                                                 \
2762     else if (c1 < 93)           /* new format (after ver.21) */         \
2763       {                                                                 \
2764         ONE_MORE_BYTE (c2);                                             \
2765         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
2766       }                                                                 \
2767     else                                                                \
2768       c1 = 0;                                                           \
2769   } while (0)
2770
2771
2772 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2773
2774 static void
2775 decode_coding_iso_2022 (coding)
2776      struct coding_system *coding;
2777 {
2778   const unsigned char *src = coding->source + coding->consumed;
2779   const unsigned char *src_end = coding->source + coding->src_bytes;
2780   const unsigned char *src_base;
2781   int *charbuf = coding->charbuf;
2782   int *charbuf_end
2783     = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
2784   int consumed_chars = 0, consumed_chars_base;
2785   int multibytep = coding->src_multibyte;
2786   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
2787   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2788   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2789   struct charset *charset;
2790   int c;
2791   /* For handling composition sequence.  */
2792 #define COMPOSING_NO                    0
2793 #define COMPOSING_CHAR                  1
2794 #define COMPOSING_RULE                  2
2795 #define COMPOSING_COMPONENT_CHAR        3
2796 #define COMPOSING_COMPONENT_RULE        4
2797
2798   int composition_state = COMPOSING_NO;
2799   enum composition_method method;
2800   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2801   int component_idx;
2802   int component_len;
2803   Lisp_Object attrs, eol_type, charset_list;
2804   int char_offset = coding->produced_char;
2805   int last_offset = char_offset;
2806   int last_id = charset_ascii;
2807
2808   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2809   setup_iso_safe_charsets (attrs);
2810
2811   while (1)
2812     {
2813       int c1, c2;
2814
2815       src_base = src;
2816       consumed_chars_base = consumed_chars;
2817
2818       if (charbuf >= charbuf_end)
2819         break;
2820
2821       ONE_MORE_BYTE (c1);
2822
2823       /* We produce at most one character.  */
2824       switch (iso_code_class [c1])
2825         {
2826         case ISO_0x20_or_0x7F:
2827           if (composition_state != COMPOSING_NO)
2828             {
2829               if (composition_state == COMPOSING_RULE
2830                   || composition_state == COMPOSING_COMPONENT_RULE)
2831                 {
2832                   DECODE_COMPOSITION_RULE (c1);
2833                   components[component_idx++] = c1;
2834                   composition_state--;
2835                   continue;
2836                 }
2837             }
2838           if (charset_id_0 < 0
2839               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2840             /* This is SPACE or DEL.  */
2841             charset = CHARSET_FROM_ID (charset_ascii);
2842           else
2843             charset = CHARSET_FROM_ID (charset_id_0);
2844           break;
2845
2846         case ISO_graphic_plane_0:
2847           if (composition_state != COMPOSING_NO)
2848             {
2849               if (composition_state == COMPOSING_RULE
2850                   || composition_state == COMPOSING_COMPONENT_RULE)
2851                 {
2852                   DECODE_COMPOSITION_RULE (c1);
2853                   components[component_idx++] = c1;
2854                   composition_state--;
2855                   continue;
2856                 }
2857             }
2858           charset = CHARSET_FROM_ID (charset_id_0);
2859           break;
2860
2861         case ISO_0xA0_or_0xFF:
2862           if (charset_id_1 < 0
2863               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2864               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2865             goto invalid_code;
2866           /* This is a graphic character, we fall down ... */
2867
2868         case ISO_graphic_plane_1:
2869           if (charset_id_1 < 0)
2870             goto invalid_code;
2871           charset = CHARSET_FROM_ID (charset_id_1);
2872           break;
2873
2874         case ISO_carriage_return:
2875           if (c1 == '\r')
2876             {
2877               if (EQ (eol_type, Qdos))
2878                 {
2879                   if (src == src_end)
2880                     {
2881                       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
2882                       goto no_more_source;
2883                     }
2884                   if (*src == '\n')
2885                     ONE_MORE_BYTE (c1);
2886                 }
2887               else if (EQ (eol_type, Qmac))
2888                 c1 = '\n';
2889             }
2890           /* fall through */
2891
2892         case ISO_control_0:
2893           MAYBE_FINISH_COMPOSITION ();
2894           charset = CHARSET_FROM_ID (charset_ascii);
2895           break;
2896
2897         case ISO_control_1:
2898           MAYBE_FINISH_COMPOSITION ();
2899           goto invalid_code;
2900
2901         case ISO_shift_out:
2902           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2903               || CODING_ISO_DESIGNATION (coding, 1) < 0)
2904             goto invalid_code;
2905           CODING_ISO_INVOCATION (coding, 0) = 1;
2906           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2907           continue;
2908
2909         case ISO_shift_in:
2910           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2911             goto invalid_code;
2912           CODING_ISO_INVOCATION (coding, 0) = 0;
2913           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2914           continue;
2915
2916         case ISO_single_shift_2_7:
2917         case ISO_single_shift_2:
2918           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2919             goto invalid_code;
2920           /* SS2 is handled as an escape sequence of ESC 'N' */
2921           c1 = 'N';
2922           goto label_escape_sequence;
2923
2924         case ISO_single_shift_3:
2925           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2926             goto invalid_code;
2927           /* SS2 is handled as an escape sequence of ESC 'O' */
2928           c1 = 'O';
2929           goto label_escape_sequence;
2930
2931         case ISO_control_sequence_introducer:
2932           /* CSI is handled as an escape sequence of ESC '[' ...  */
2933           c1 = '[';
2934           goto label_escape_sequence;
2935
2936         case ISO_escape:
2937           ONE_MORE_BYTE (c1);
2938         label_escape_sequence:
2939           /* Escape sequences handled here are invocation,
2940              designation, direction specification, and character
2941              composition specification.  */
2942           switch (c1)
2943             {
2944             case '&':           /* revision of following character set */
2945               ONE_MORE_BYTE (c1);
2946               if (!(c1 >= '@' && c1 <= '~'))
2947                 goto invalid_code;
2948               ONE_MORE_BYTE (c1);
2949               if (c1 != ISO_CODE_ESC)
2950                 goto invalid_code;
2951               ONE_MORE_BYTE (c1);
2952               goto label_escape_sequence;
2953
2954             case '$':           /* designation of 2-byte character set */
2955               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2956                 goto invalid_code;
2957               ONE_MORE_BYTE (c1);
2958               if (c1 >= '@' && c1 <= 'B')
2959                 {       /* designation of JISX0208.1978, GB2312.1980,
2960                            or JISX0208.1980 */
2961                   DECODE_DESIGNATION (0, 2, 0, c1);
2962                 }
2963               else if (c1 >= 0x28 && c1 <= 0x2B)
2964                 {       /* designation of DIMENSION2_CHARS94 character set */
2965                   ONE_MORE_BYTE (c2);
2966                   DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
2967                 }
2968               else if (c1 >= 0x2C && c1 <= 0x2F)
2969                 {       /* designation of DIMENSION2_CHARS96 character set */
2970                   ONE_MORE_BYTE (c2);
2971                   DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
2972                 }
2973               else
2974                 goto invalid_code;
2975               /* We must update these variables now.  */
2976               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2977               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2978               continue;
2979
2980             case 'n':           /* invocation of locking-shift-2 */
2981               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2982                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
2983                 goto invalid_code;
2984               CODING_ISO_INVOCATION (coding, 0) = 2;
2985               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2986               continue;
2987
2988             case 'o':           /* invocation of locking-shift-3 */
2989               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2990                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
2991                 goto invalid_code;
2992               CODING_ISO_INVOCATION (coding, 0) = 3;
2993               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2994               continue;
2995
2996             case 'N':           /* invocation of single-shift-2 */
2997               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2998                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
2999                 goto invalid_code;
3000               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
3001               ONE_MORE_BYTE (c1);
3002               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3003                 goto invalid_code;
3004               break;
3005
3006             case 'O':           /* invocation of single-shift-3 */
3007               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3008                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3009                 goto invalid_code;
3010               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
3011               ONE_MORE_BYTE (c1);
3012               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3013                 goto invalid_code;
3014               break;
3015
3016             case '0': case '2': case '3': case '4': /* start composition */
3017               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3018                 goto invalid_code;
3019               DECODE_COMPOSITION_START (c1);
3020               continue;
3021
3022             case '1':           /* end composition */
3023               if (composition_state == COMPOSING_NO)
3024                 goto invalid_code;
3025               DECODE_COMPOSITION_END ();
3026               continue;
3027
3028             case '[':           /* specification of direction */
3029               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3030                 goto invalid_code;
3031               /* For the moment, nested direction is not supported.
3032                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3033                  left-to-right, and nozero means right-to-left.  */
3034               ONE_MORE_BYTE (c1);
3035               switch (c1)
3036                 {
3037                 case ']':       /* end of the current direction */
3038                   coding->mode &= ~CODING_MODE_DIRECTION;
3039
3040                 case '0':       /* end of the current direction */
3041                 case '1':       /* start of left-to-right direction */
3042                   ONE_MORE_BYTE (c1);
3043                   if (c1 == ']')
3044                     coding->mode &= ~CODING_MODE_DIRECTION;
3045                   else
3046                     goto invalid_code;
3047                   break;
3048
3049                 case '2':       /* start of right-to-left direction */
3050                   ONE_MORE_BYTE (c1);
3051                   if (c1 == ']')
3052                     coding->mode |= CODING_MODE_DIRECTION;
3053                   else
3054                     goto invalid_code;
3055                   break;
3056
3057                 default:
3058                   goto invalid_code;
3059                 }
3060               continue;
3061
3062             case '%':
3063               ONE_MORE_BYTE (c1);
3064               if (c1 == '/')
3065                 {
3066                   /* CTEXT extended segment:
3067                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3068                      We keep these bytes as is for the moment.
3069                      They may be decoded by post-read-conversion.  */
3070                   int dim, M, L;
3071                   int size;
3072
3073                   ONE_MORE_BYTE (dim);
3074                   ONE_MORE_BYTE (M);
3075                   ONE_MORE_BYTE (L);
3076                   size = ((M - 128) * 128) + (L - 128);
3077                   if (charbuf + 8 + size > charbuf_end)
3078                     goto break_loop;
3079                   *charbuf++ = ISO_CODE_ESC;
3080                   *charbuf++ = '%';
3081                   *charbuf++ = '/';
3082                   *charbuf++ = dim;
3083                   *charbuf++ = BYTE8_TO_CHAR (M);
3084                   *charbuf++ = BYTE8_TO_CHAR (L);
3085                   while (size-- > 0)
3086                     {
3087                       ONE_MORE_BYTE (c1);
3088                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3089                     }
3090                 }
3091               else if (c1 == 'G')
3092                 {
3093                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3094                      ESC % G --UTF-8-BYTES-- ESC % @
3095                      We keep these bytes as is for the moment.
3096                      They may be decoded by post-read-conversion.  */
3097                   int *p = charbuf;
3098
3099                   if (p + 6 > charbuf_end)
3100                     goto break_loop;
3101                   *p++ = ISO_CODE_ESC;
3102                   *p++ = '%';
3103                   *p++ = 'G';
3104                   while (p < charbuf_end)
3105                     {
3106                       ONE_MORE_BYTE (c1);
3107                       if (c1 == ISO_CODE_ESC
3108                           && src + 1 < src_end
3109                           && src[0] == '%'
3110                           && src[1] == '@')
3111                         break;
3112                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3113                     }
3114                   if (p + 3 > charbuf_end)
3115                     goto break_loop;
3116                   *p++ = ISO_CODE_ESC;
3117                   *p++ = '%';
3118                   *p++ = '@';
3119                   charbuf = p;
3120                 }
3121               else
3122                 goto invalid_code;
3123               continue;
3124               break;
3125
3126             default:
3127               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3128                 goto invalid_code;
3129               if (c1 >= 0x28 && c1 <= 0x2B)
3130                 {       /* designation of DIMENSION1_CHARS94 character set */
3131                   ONE_MORE_BYTE (c2);
3132                   DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
3133                 }
3134               else if (c1 >= 0x2C && c1 <= 0x2F)
3135                 {       /* designation of DIMENSION1_CHARS96 character set */
3136                   ONE_MORE_BYTE (c2);
3137                   DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
3138                 }
3139               else
3140                 goto invalid_code;
3141               /* We must update these variables now.  */
3142               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3143               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3144               continue;
3145             }
3146         }
3147
3148       if (charset->id != charset_ascii
3149           && last_id != charset->id)
3150         {
3151           if (last_id != charset_ascii)
3152             ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3153           last_id = charset->id;
3154           last_offset = char_offset;
3155         }
3156
3157       /* Now we know CHARSET and 1st position code C1 of a character.
3158          Produce a decoded character while getting 2nd position code
3159          C2 if necessary.  */
3160       c1 &= 0x7F;
3161       if (CHARSET_DIMENSION (charset) > 1)
3162         {
3163           ONE_MORE_BYTE (c2);
3164           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3165             /* C2 is not in a valid range.  */
3166             goto invalid_code;
3167           c1 = (c1 << 8) | (c2 & 0x7F);
3168           if (CHARSET_DIMENSION (charset) > 2)
3169             {
3170               ONE_MORE_BYTE (c2);
3171               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3172                 /* C2 is not in a valid range.  */
3173                 goto invalid_code;
3174               c1 = (c1 << 8) | (c2 & 0x7F);
3175             }
3176         }
3177
3178       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3179       if (c < 0)
3180         {
3181           MAYBE_FINISH_COMPOSITION ();
3182           for (; src_base < src; src_base++, char_offset++)
3183             {
3184               if (ASCII_BYTE_P (*src_base))
3185                 *charbuf++ = *src_base;
3186               else
3187                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3188             }
3189         }
3190       else if (composition_state == COMPOSING_NO)
3191         {
3192           *charbuf++ = c;
3193           char_offset++;
3194         }
3195       else
3196         {
3197           components[component_idx++] = c;
3198           if (method == COMPOSITION_WITH_RULE
3199               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3200                   && composition_state == COMPOSING_COMPONENT_CHAR))
3201             composition_state++;
3202         }
3203       continue;
3204
3205     invalid_code:
3206       MAYBE_FINISH_COMPOSITION ();
3207       src = src_base;
3208       consumed_chars = consumed_chars_base;
3209       ONE_MORE_BYTE (c);
3210       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3211       char_offset++;
3212       coding->errors++;
3213       continue;
3214
3215     break_loop:
3216       break;
3217     }
3218
3219  no_more_source:
3220   if (last_id != charset_ascii)
3221     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3222   coding->consumed_char += consumed_chars_base;
3223   coding->consumed = src_base - coding->source;
3224   coding->charbuf_used = charbuf - coding->charbuf;
3225 }
3226
3227
3228 /* ISO2022 encoding stuff.  */
3229
3230 /*
3231    It is not enough to say just "ISO2022" on encoding, we have to
3232    specify more details.  In Emacs, each coding system of ISO2022
3233    variant has the following specifications:
3234         1. Initial designation to G0 thru G3.
3235         2. Allows short-form designation?
3236         3. ASCII should be designated to G0 before control characters?
3237         4. ASCII should be designated to G0 at end of line?
3238         5. 7-bit environment or 8-bit environment?
3239         6. Use locking-shift?
3240         7. Use Single-shift?
3241    And the following two are only for Japanese:
3242         8. Use ASCII in place of JIS0201-1976-Roman?
3243         9. Use JISX0208-1983 in place of JISX0208-1978?
3244    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3245    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3246    details.
3247 */
3248
3249 /* Produce codes (escape sequence) for designating CHARSET to graphic
3250    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3251    '@', 'A', or 'B' and the coding system CODING allows, produce
3252    designation sequence of short-form.  */
3253
3254 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3255   do {                                                                  \
3256     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3257     char *intermediate_char_94 = "()*+";                                \
3258     char *intermediate_char_96 = ",-./";                                \
3259     int revision = -1;                                                  \
3260     int c;                                                              \
3261                                                                         \
3262     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3263       revision = CHARSET_ISO_REVISION (charset);                        \
3264                                                                         \
3265     if (revision >= 0)                                                  \
3266       {                                                                 \
3267         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3268         EMIT_ONE_BYTE ('@' + revision);                                 \
3269       }                                                                 \
3270     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3271     if (CHARSET_DIMENSION (charset) == 1)                               \
3272       {                                                                 \
3273         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3274           c = intermediate_char_94[reg];                                \
3275         else                                                            \
3276           c = intermediate_char_96[reg];                                \
3277         EMIT_ONE_ASCII_BYTE (c);                                        \
3278       }                                                                 \
3279     else                                                                \
3280       {                                                                 \
3281         EMIT_ONE_ASCII_BYTE ('$');                                      \
3282         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3283           {                                                             \
3284             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3285                 || reg != 0                                             \
3286                 || final_char < '@' || final_char > 'B')                \
3287               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3288           }                                                             \
3289         else                                                            \
3290           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3291       }                                                                 \
3292     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3293                                                                         \
3294     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3295   } while (0)
3296
3297
3298 /* The following two macros produce codes (control character or escape
3299    sequence) for ISO2022 single-shift functions (single-shift-2 and
3300    single-shift-3).  */
3301
3302 #define ENCODE_SINGLE_SHIFT_2                                           \
3303   do {                                                                  \
3304     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3305       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3306     else                                                                \
3307       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3308     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3309   } while (0)
3310
3311
3312 #define ENCODE_SINGLE_SHIFT_3                                           \
3313   do {                                                                  \
3314     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3315       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3316     else                                                                \
3317       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3318     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3319   } while (0)
3320
3321
3322 /* The following four macros produce codes (control character or
3323    escape sequence) for ISO2022 locking-shift functions (shift-in,
3324    shift-out, locking-shift-2, and locking-shift-3).  */
3325
3326 #define ENCODE_SHIFT_IN                                 \
3327   do {                                                  \
3328     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3329     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3330   } while (0)
3331
3332
3333 #define ENCODE_SHIFT_OUT                                \
3334   do {                                                  \
3335     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3336     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3337   } while (0)
3338
3339
3340 #define ENCODE_LOCKING_SHIFT_2                          \
3341   do {                                                  \
3342     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3343     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3344   } while (0)
3345
3346
3347 #define ENCODE_LOCKING_SHIFT_3                          \
3348   do {                                                  \
3349     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3350     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3351   } while (0)
3352
3353
3354 /* Produce codes for a DIMENSION1 character whose character set is
3355    CHARSET and whose position-code is C1.  Designation and invocation
3356    sequences are also produced in advance if necessary.  */
3357
3358 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3359   do {                                                                  \
3360     int id = CHARSET_ID (charset);                                      \
3361                                                                         \
3362     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3363         && id == charset_ascii)                                         \
3364       {                                                                 \
3365         id = charset_jisx0201_roman;                                    \
3366         charset = CHARSET_FROM_ID (id);                                 \
3367       }                                                                 \
3368                                                                         \
3369     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3370       {                                                                 \
3371         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3372           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3373         else                                                            \
3374           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3375         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3376         break;                                                          \
3377       }                                                                 \
3378     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3379       {                                                                 \
3380         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3381         break;                                                          \
3382       }                                                                 \
3383     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3384       {                                                                 \
3385         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3386         break;                                                          \
3387       }                                                                 \
3388     else                                                                \
3389       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3390          must invoke it, or, at first, designate it to some graphic     \
3391          register.  Then repeat the loop to actually produce the        \
3392          character.  */                                                 \
3393       dst = encode_invocation_designation (charset, coding, dst,        \
3394                                            &produced_chars);            \
3395   } while (1)
3396
3397
3398 /* Produce codes for a DIMENSION2 character whose character set is
3399    CHARSET and whose position-codes are C1 and C2.  Designation and
3400    invocation codes are also produced in advance if necessary.  */
3401
3402 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3403   do {                                                                  \
3404     int id = CHARSET_ID (charset);                                      \
3405                                                                         \
3406     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3407         && id == charset_jisx0208)                                      \
3408       {                                                                 \
3409         id = charset_jisx0208_1978;                                     \
3410         charset = CHARSET_FROM_ID (id);                                 \
3411       }                                                                 \
3412                                                                         \
3413     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3414       {                                                                 \
3415         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3416           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3417         else                                                            \
3418           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3419         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3420         break;                                                          \
3421       }                                                                 \
3422     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3423       {                                                                 \
3424         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3425         break;                                                          \
3426       }                                                                 \
3427     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3428       {                                                                 \
3429         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3430         break;                                                          \
3431       }                                                                 \
3432     else                                                                \
3433       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3434          must invoke it, or, at first, designate it to some graphic     \
3435          register.  Then repeat the loop to actually produce the        \
3436          character.  */                                                 \
3437       dst = encode_invocation_designation (charset, coding, dst,        \
3438                                            &produced_chars);            \
3439   } while (1)
3440
3441
3442 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3443   do {                                                                     \
3444     int code = ENCODE_CHAR ((charset),(c));                                \
3445                                                                            \
3446     if (CHARSET_DIMENSION (charset) == 1)                                  \
3447       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3448     else                                                                   \
3449       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3450   } while (0)
3451
3452
3453 /* Produce designation and invocation codes at a place pointed by DST
3454    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3455    Return new DST.  */
3456
3457 unsigned char *
3458 encode_invocation_designation (charset, coding, dst, p_nchars)
3459      struct charset *charset;
3460      struct coding_system *coding;
3461      unsigned char *dst;
3462      int *p_nchars;
3463 {
3464   int multibytep = coding->dst_multibyte;
3465   int produced_chars = *p_nchars;
3466   int reg;                      /* graphic register number */
3467   int id = CHARSET_ID (charset);
3468
3469   /* At first, check designations.  */
3470   for (reg = 0; reg < 4; reg++)
3471     if (id == CODING_ISO_DESIGNATION (coding, reg))
3472       break;
3473
3474   if (reg >= 4)
3475     {
3476       /* CHARSET is not yet designated to any graphic registers.  */
3477       /* At first check the requested designation.  */
3478       reg = CODING_ISO_REQUEST (coding, id);
3479       if (reg < 0)
3480         /* Since CHARSET requests no special designation, designate it
3481            to graphic register 0.  */
3482         reg = 0;
3483
3484       ENCODE_DESIGNATION (charset, reg, coding);
3485     }
3486
3487   if (CODING_ISO_INVOCATION (coding, 0) != reg
3488       && CODING_ISO_INVOCATION (coding, 1) != reg)
3489     {
3490       /* Since the graphic register REG is not invoked to any graphic
3491          planes, invoke it to graphic plane 0.  */
3492       switch (reg)
3493         {
3494         case 0:                 /* graphic register 0 */
3495           ENCODE_SHIFT_IN;
3496           break;
3497
3498         case 1:                 /* graphic register 1 */
3499           ENCODE_SHIFT_OUT;
3500           break;
3501
3502         case 2:                 /* graphic register 2 */
3503           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3504             ENCODE_SINGLE_SHIFT_2;
3505           else
3506             ENCODE_LOCKING_SHIFT_2;
3507           break;
3508
3509         case 3:                 /* graphic register 3 */
3510           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3511             ENCODE_SINGLE_SHIFT_3;
3512           else
3513             ENCODE_LOCKING_SHIFT_3;
3514           break;
3515         }
3516     }
3517
3518   *p_nchars = produced_chars;
3519   return dst;
3520 }
3521
3522 /* The following three macros produce codes for indicating direction
3523    of text.  */
3524 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3525   do {                                                                  \
3526     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3527       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3528     else                                                                \
3529       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3530   } while (0)
3531
3532
3533 #define ENCODE_DIRECTION_R2L()                  \
3534   do {                                          \
3535     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3536     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3537   } while (0)
3538
3539
3540 #define ENCODE_DIRECTION_L2R()                  \
3541   do {                                          \
3542     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3543     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3544   } while (0)
3545
3546
3547 /* Produce codes for designation and invocation to reset the graphic
3548    planes and registers to initial state.  */
3549 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3550   do {                                                                  \
3551     int reg;                                                            \
3552     struct charset *charset;                                            \
3553                                                                         \
3554     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3555       ENCODE_SHIFT_IN;                                                  \
3556     for (reg = 0; reg < 4; reg++)                                       \
3557       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3558           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3559               != CODING_ISO_INITIAL (coding, reg)))                     \
3560         {                                                               \
3561           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3562           ENCODE_DESIGNATION (charset, reg, coding);                    \
3563         }                                                               \
3564   } while (0)
3565
3566
3567 /* Produce designation sequences of charsets in the line started from
3568    SRC to a place pointed by DST, and return updated DST.
3569
3570    If the current block ends before any end-of-line, we may fail to
3571    find all the necessary designations.  */
3572
3573 static unsigned char *
3574 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3575      struct coding_system *coding;
3576      int *charbuf, *charbuf_end;
3577      unsigned char *dst;
3578 {
3579   struct charset *charset;
3580   /* Table of charsets to be designated to each graphic register.  */
3581   int r[4];
3582   int c, found = 0, reg;
3583   int produced_chars = 0;
3584   int multibytep = coding->dst_multibyte;
3585   Lisp_Object attrs;
3586   Lisp_Object charset_list;
3587
3588   attrs = CODING_ID_ATTRS (coding->id);
3589   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3590   if (EQ (charset_list, Qiso_2022))
3591     charset_list = Viso_2022_charset_list;
3592
3593   for (reg = 0; reg < 4; reg++)
3594     r[reg] = -1;
3595
3596   while (found < 4)
3597     {
3598       int id;
3599
3600       c = *charbuf++;
3601       if (c == '\n')
3602         break;
3603       charset = char_charset (c, charset_list, NULL);
3604       id = CHARSET_ID (charset);
3605       reg = CODING_ISO_REQUEST (coding, id);
3606       if (reg >= 0 && r[reg] < 0)
3607         {
3608           found++;
3609           r[reg] = id;
3610         }
3611     }
3612
3613   if (found)
3614     {
3615       for (reg = 0; reg < 4; reg++)
3616         if (r[reg] >= 0
3617             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3618           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3619     }
3620
3621   return dst;
3622 }
3623
3624 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
3625
3626 static int
3627 encode_coding_iso_2022 (coding)
3628      struct coding_system *coding;
3629 {
3630   int multibytep = coding->dst_multibyte;
3631   int *charbuf = coding->charbuf;
3632   int *charbuf_end = charbuf + coding->charbuf_used;
3633   unsigned char *dst = coding->destination + coding->produced;
3634   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3635   int safe_room = 16;
3636   int bol_designation
3637     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3638        && CODING_ISO_BOL (coding));
3639   int produced_chars = 0;
3640   Lisp_Object attrs, eol_type, charset_list;
3641   int ascii_compatible;
3642   int c;
3643   int preferred_charset_id = -1;
3644
3645   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3646   setup_iso_safe_charsets (attrs);
3647   /* Charset list may have been changed.  */
3648   charset_list = CODING_ATTR_CHARSET_LIST (attrs);              \
3649   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3650
3651   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3652
3653   while (charbuf < charbuf_end)
3654     {
3655       ASSURE_DESTINATION (safe_room);
3656
3657       if (bol_designation)
3658         {
3659           unsigned char *dst_prev = dst;
3660
3661           /* We have to produce designation sequences if any now.  */
3662           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3663           bol_designation = 0;
3664           /* We are sure that designation sequences are all ASCII bytes.  */
3665           produced_chars += dst - dst_prev;
3666         }
3667
3668       c = *charbuf++;
3669
3670       if (c < 0)
3671         {
3672           /* Handle an annotation.  */
3673           switch (*charbuf)
3674             {
3675             case CODING_ANNOTATE_COMPOSITION_MASK:
3676               /* Not yet implemented.  */
3677               break;
3678             case CODING_ANNOTATE_CHARSET_MASK:
3679               preferred_charset_id = charbuf[3];
3680               if (preferred_charset_id >= 0
3681                   && NILP (Fmemq (make_number (preferred_charset_id),
3682                                   charset_list)))
3683                 preferred_charset_id = -1;
3684               break;
3685             default:
3686               abort ();
3687             }
3688           charbuf += -c - 1;
3689           continue;
3690         }
3691
3692       /* Now encode the character C.  */
3693       if (c < 0x20 || c == 0x7F)
3694         {
3695           if (c == '\n'
3696               || (c == '\r' && EQ (eol_type, Qmac)))
3697             {
3698               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3699                 ENCODE_RESET_PLANE_AND_REGISTER ();
3700               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3701                 {
3702                   int i;
3703
3704                   for (i = 0; i < 4; i++)
3705                     CODING_ISO_DESIGNATION (coding, i)
3706                       = CODING_ISO_INITIAL (coding, i);
3707                 }
3708               bol_designation
3709                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3710             }
3711           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3712             ENCODE_RESET_PLANE_AND_REGISTER ();
3713           EMIT_ONE_ASCII_BYTE (c);
3714         }
3715       else if (ASCII_CHAR_P (c))
3716         {
3717           if (ascii_compatible)
3718             EMIT_ONE_ASCII_BYTE (c);
3719           else
3720             {
3721               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3722               ENCODE_ISO_CHARACTER (charset, c);
3723             }
3724         }
3725       else if (CHAR_BYTE8_P (c))
3726         {
3727           c = CHAR_TO_BYTE8 (c);
3728           EMIT_ONE_BYTE (c);
3729         }
3730       else
3731         {
3732           struct charset *charset;
3733
3734           if (preferred_charset_id >= 0)
3735             {
3736               charset = CHARSET_FROM_ID (preferred_charset_id);
3737               if (! CHAR_CHARSET_P (c, charset))
3738                 charset = char_charset (c, charset_list, NULL);
3739             }
3740           else
3741             charset = char_charset (c, charset_list, NULL);
3742           if (!charset)
3743             {
3744               if (coding->mode & CODING_MODE_SAFE_ENCODING)
3745                 {
3746                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3747                   charset = CHARSET_FROM_ID (charset_ascii);
3748                 }
3749               else
3750                 {
3751                   c = coding->default_char;
3752                   charset = char_charset (c, charset_list, NULL);
3753                 }
3754             }
3755           ENCODE_ISO_CHARACTER (charset, c);
3756         }
3757     }
3758
3759   if (coding->mode & CODING_MODE_LAST_BLOCK
3760       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3761     {
3762       ASSURE_DESTINATION (safe_room);
3763       ENCODE_RESET_PLANE_AND_REGISTER ();
3764     }
3765   coding->result = CODING_RESULT_SUCCESS;
3766   CODING_ISO_BOL (coding) = bol_designation;
3767   coding->produced_char += produced_chars;
3768   coding->produced = dst - coding->destination;
3769   return 0;
3770 }
3771
3772 \f
3773 /*** 8,9. SJIS and BIG5 handlers ***/
3774
3775 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3776    quite widely.  So, for the moment, Emacs supports them in the bare
3777    C code.  But, in the future, they may be supported only by CCL.  */
3778
3779 /* SJIS is a coding system encoding three character sets: ASCII, right
3780    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
3781    as is.  A character of charset katakana-jisx0201 is encoded by
3782    "position-code + 0x80".  A character of charset japanese-jisx0208
3783    is encoded in 2-byte but two position-codes are divided and shifted
3784    so that it fit in the range below.
3785
3786    --- CODE RANGE of SJIS ---
3787    (character set)      (range)
3788    ASCII                0x00 .. 0x7F
3789    KATAKANA-JISX0201    0xA0 .. 0xDF
3790    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
3791             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
3792    -------------------------------
3793
3794 */
3795
3796 /* BIG5 is a coding system encoding two character sets: ASCII and
3797    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
3798    character set and is encoded in two-byte.
3799
3800    --- CODE RANGE of BIG5 ---
3801    (character set)      (range)
3802    ASCII                0x00 .. 0x7F
3803    Big5 (1st byte)      0xA1 .. 0xFE
3804         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
3805    --------------------------
3806
3807   */
3808
3809 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3810    Check if a text is encoded in SJIS.  If it is, return
3811    CATEGORY_MASK_SJIS, else return 0.  */
3812
3813 static int
3814 detect_coding_sjis (coding, detect_info)
3815      struct coding_system *coding;
3816      struct coding_detection_info *detect_info;
3817 {
3818   const unsigned char *src = coding->source, *src_base = src;
3819   const unsigned char *src_end = coding->source + coding->src_bytes;
3820   int multibytep = coding->src_multibyte;
3821   int consumed_chars = 0;
3822   int found = 0;
3823   int c;
3824   int incomplete;
3825
3826   detect_info->checked |= CATEGORY_MASK_SJIS;
3827   /* A coding system of this category is always ASCII compatible.  */
3828   src += coding->head_ascii;
3829
3830   while (1)
3831     {
3832       incomplete = 0;
3833       ONE_MORE_BYTE (c);
3834       incomplete = 1;
3835       if (c < 0x80)
3836         continue;
3837       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3838         {
3839           ONE_MORE_BYTE (c);
3840           if (c < 0x40 || c == 0x7F || c > 0xFC)
3841             break;
3842           found = CATEGORY_MASK_SJIS;
3843         }
3844       else if (c >= 0xA0 && c < 0xE0)
3845         found = CATEGORY_MASK_SJIS;
3846       else
3847         break;
3848     }
3849   detect_info->rejected |= CATEGORY_MASK_SJIS;
3850   return 0;
3851
3852  no_more_source:
3853   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3854     {
3855       detect_info->rejected |= CATEGORY_MASK_SJIS;
3856       return 0;
3857     }
3858   detect_info->found |= found;
3859   return 1;
3860 }
3861
3862 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3863    Check if a text is encoded in BIG5.  If it is, return
3864    CATEGORY_MASK_BIG5, else return 0.  */
3865
3866 static int
3867 detect_coding_big5 (coding, detect_info)
3868      struct coding_system *coding;
3869      struct coding_detection_info *detect_info;
3870 {
3871   const unsigned char *src = coding->source, *src_base = src;
3872   const unsigned char *src_end = coding->source + coding->src_bytes;
3873   int multibytep = coding->src_multibyte;
3874   int consumed_chars = 0;
3875   int found = 0;
3876   int c;
3877   int incomplete;
3878
3879   detect_info->checked |= CATEGORY_MASK_BIG5;
3880   /* A coding system of this category is always ASCII compatible.  */
3881   src += coding->head_ascii;
3882
3883   while (1)
3884     {
3885       incomplete = 0;
3886       ONE_MORE_BYTE (c);
3887       incomplete = 1;
3888       if (c < 0x80)
3889         continue;
3890       if (c >= 0xA1)
3891         {
3892           ONE_MORE_BYTE (c);
3893           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3894             return 0;
3895           found = CATEGORY_MASK_BIG5;
3896         }
3897       else
3898         break;
3899     }
3900   detect_info->rejected |= CATEGORY_MASK_BIG5;
3901   return 0;
3902
3903  no_more_source:
3904   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3905     {
3906       detect_info->rejected |= CATEGORY_MASK_BIG5;
3907       return 0;
3908     }
3909   detect_info->found |= found;
3910   return 1;
3911 }
3912
3913 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3914    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3915
3916 static void
3917 decode_coding_sjis (coding)
3918      struct coding_system *coding;
3919 {
3920   const unsigned char *src = coding->source + coding->consumed;
3921   const unsigned char *src_end = coding->source + coding->src_bytes;
3922   const unsigned char *src_base;
3923   int *charbuf = coding->charbuf;
3924   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
3925   int consumed_chars = 0, consumed_chars_base;
3926   int multibytep = coding->src_multibyte;
3927   struct charset *charset_roman, *charset_kanji, *charset_kana;
3928   Lisp_Object attrs, eol_type, charset_list, val;
3929   int char_offset = coding->produced_char;
3930   int last_offset = char_offset;
3931   int last_id = charset_ascii;
3932
3933   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3934
3935   val = charset_list;
3936   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3937   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3938   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3939
3940   while (1)
3941     {
3942       int c, c1;
3943
3944       src_base = src;
3945       consumed_chars_base = consumed_chars;
3946
3947       if (charbuf >= charbuf_end)
3948         break;
3949
3950       ONE_MORE_BYTE (c);
3951
3952       if (c == '\r')
3953         {
3954           if (EQ (eol_type, Qdos))
3955             {
3956               if (src == src_end)
3957                 {
3958                   coding->result = CODING_RESULT_INSUFFICIENT_SRC;
3959                   goto no_more_source;
3960                 }
3961               if (*src == '\n')
3962                 ONE_MORE_BYTE (c);
3963             }
3964           else if (EQ (eol_type, Qmac))
3965             c = '\n';
3966         }
3967       else
3968         {
3969           struct charset *charset;
3970
3971           if (c < 0x80)
3972             charset = charset_roman;
3973           else
3974             {
3975               if (c >= 0xF0)
3976                 goto invalid_code;
3977               if (c < 0xA0 || c >= 0xE0)
3978                 {
3979                   /* SJIS -> JISX0208 */
3980                   ONE_MORE_BYTE (c1);
3981                   if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3982                     goto invalid_code;
3983                   c = (c << 8) | c1;
3984                   SJIS_TO_JIS (c);
3985                   charset = charset_kanji;
3986                 }
3987               else if (c > 0xA0)
3988                 {
3989                   /* SJIS -> JISX0201-Kana */
3990                   c &= 0x7F;
3991                   charset = charset_kana;
3992                 }
3993               else
3994                 goto invalid_code;
3995             }
3996           if (charset->id != charset_ascii
3997               && last_id != charset->id)
3998             {
3999               if (last_id != charset_ascii)
4000                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4001               last_id = charset->id;
4002               last_offset = char_offset;
4003             }
4004           CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4005         }
4006       *charbuf++ = c;
4007       char_offset++;
4008       continue;
4009
4010     invalid_code:
4011       src = src_base;
4012       consumed_chars = consumed_chars_base;
4013       ONE_MORE_BYTE (c);
4014       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4015       char_offset++;
4016       coding->errors++;
4017     }
4018
4019  no_more_source:
4020   if (last_id != charset_ascii)
4021     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4022   coding->consumed_char += consumed_chars_base;
4023   coding->consumed = src_base - coding->source;
4024   coding->charbuf_used = charbuf - coding->charbuf;
4025 }
4026
4027 static void
4028 decode_coding_big5 (coding)
4029      struct coding_system *coding;
4030 {
4031   const unsigned char *src = coding->source + coding->consumed;
4032   const unsigned char *src_end = coding->source + coding->src_bytes;
4033   const unsigned char *src_base;
4034   int *charbuf = coding->charbuf;
4035   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4036   int consumed_chars = 0, consumed_chars_base;
4037   int multibytep = coding->src_multibyte;
4038   struct charset *charset_roman, *charset_big5;
4039   Lisp_Object attrs, eol_type, charset_list, val;
4040   int char_offset = coding->produced_char;
4041   int last_offset = char_offset;
4042   int last_id = charset_ascii;
4043
4044   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4045   val = charset_list;
4046   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4047   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4048
4049   while (1)
4050     {
4051       int c, c1;
4052
4053       src_base = src;
4054       consumed_chars_base = consumed_chars;
4055
4056       if (charbuf >= charbuf_end)
4057         break;
4058
4059       ONE_MORE_BYTE (c);
4060
4061       if (c == '\r')
4062         {
4063           if (EQ (eol_type, Qdos))
4064             {
4065               if (src == src_end)
4066                 {
4067                   coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4068                   goto no_more_source;
4069                 }
4070               if (*src == '\n')
4071                 ONE_MORE_BYTE (c);
4072             }
4073           else if (EQ (eol_type, Qmac))
4074             c = '\n';
4075         }
4076       else
4077         {
4078           struct charset *charset;
4079           if (c < 0x80)
4080             charset = charset_roman;
4081           else
4082             {
4083               /* BIG5 -> Big5 */
4084               if (c < 0xA1 || c > 0xFE)
4085                 goto invalid_code;
4086               ONE_MORE_BYTE (c1);
4087               if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4088                 goto invalid_code;
4089               c = c << 8 | c1;
4090               charset = charset_big5;
4091             }
4092           if (charset->id != charset_ascii
4093               && last_id != charset->id)
4094             {
4095               if (last_id != charset_ascii)
4096                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4097               last_id = charset->id;
4098               last_offset = char_offset;
4099             }
4100           CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4101         }
4102
4103       *charbuf++ = c;
4104       char_offset++;
4105       continue;
4106
4107     invalid_code:
4108       src = src_base;
4109       consumed_chars = consumed_chars_base;
4110       ONE_MORE_BYTE (c);
4111       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4112       char_offset++;
4113       coding->errors++;
4114     }
4115
4116  no_more_source:
4117   if (last_id != charset_ascii)
4118     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4119   coding->consumed_char += consumed_chars_base;
4120   coding->consumed = src_base - coding->source;
4121   coding->charbuf_used = charbuf - coding->charbuf;
4122 }
4123
4124 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4125    This function can encode charsets `ascii', `katakana-jisx0201',
4126    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4127    are sure that all these charsets are registered as official charset
4128    (i.e. do not have extended leading-codes).  Characters of other
4129    charsets are produced without any encoding.  If SJIS_P is 1, encode
4130    SJIS text, else encode BIG5 text.  */
4131
4132 static int
4133 encode_coding_sjis (coding)
4134      struct coding_system *coding;
4135 {
4136   int multibytep = coding->dst_multibyte;
4137   int *charbuf = coding->charbuf;
4138   int *charbuf_end = charbuf + coding->charbuf_used;
4139   unsigned char *dst = coding->destination + coding->produced;
4140   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4141   int safe_room = 4;
4142   int produced_chars = 0;
4143   Lisp_Object attrs, eol_type, charset_list, val;
4144   int ascii_compatible;
4145   struct charset *charset_roman, *charset_kanji, *charset_kana;
4146   int c;
4147
4148   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4149   val = charset_list;
4150   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4151   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4152   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4153
4154   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4155
4156   while (charbuf < charbuf_end)
4157     {
4158       ASSURE_DESTINATION (safe_room);
4159       c = *charbuf++;
4160       /* Now encode the character C.  */
4161       if (ASCII_CHAR_P (c) && ascii_compatible)
4162         EMIT_ONE_ASCII_BYTE (c);
4163       else if (CHAR_BYTE8_P (c))
4164         {
4165           c = CHAR_TO_BYTE8 (c);
4166           EMIT_ONE_BYTE (c);
4167         }
4168       else
4169         {
4170           unsigned code;
4171           struct charset *charset = char_charset (c, charset_list, &code);
4172
4173           if (!charset)
4174             {
4175               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4176                 {
4177                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4178                   charset = CHARSET_FROM_ID (charset_ascii);
4179                 }
4180               else
4181                 {
4182                   c = coding->default_char;
4183                   charset = char_charset (c, charset_list, &code);
4184                 }
4185             }
4186           if (code == CHARSET_INVALID_CODE (charset))
4187             abort ();
4188           if (charset == charset_kanji)
4189             {
4190               int c1, c2;
4191               JIS_TO_SJIS (code);
4192               c1 = code >> 8, c2 = code & 0xFF;
4193               EMIT_TWO_BYTES (c1, c2);
4194             }
4195           else if (charset == charset_kana)
4196             EMIT_ONE_BYTE (code | 0x80);
4197           else
4198             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4199         }
4200     }
4201   coding->result = CODING_RESULT_SUCCESS;
4202   coding->produced_char += produced_chars;
4203   coding->produced = dst - coding->destination;
4204   return 0;
4205 }
4206
4207 static int
4208 encode_coding_big5 (coding)
4209      struct coding_system *coding;
4210 {
4211   int multibytep = coding->dst_multibyte;
4212   int *charbuf = coding->charbuf;
4213   int *charbuf_end = charbuf + coding->charbuf_used;
4214   unsigned char *dst = coding->destination + coding->produced;
4215   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4216   int safe_room = 4;
4217   int produced_chars = 0;
4218   Lisp_Object attrs, eol_type, charset_list, val;
4219   int ascii_compatible;
4220   struct charset *charset_roman, *charset_big5;
4221   int c;
4222
4223   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4224   val = charset_list;
4225   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4226   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4227   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4228
4229   while (charbuf < charbuf_end)
4230     {
4231       ASSURE_DESTINATION (safe_room);
4232       c = *charbuf++;
4233       /* Now encode the character C.  */
4234       if (ASCII_CHAR_P (c) && ascii_compatible)
4235         EMIT_ONE_ASCII_BYTE (c);
4236       else if (CHAR_BYTE8_P (c))
4237         {
4238           c = CHAR_TO_BYTE8 (c);
4239           EMIT_ONE_BYTE (c);
4240         }
4241       else
4242         {
4243           unsigned code;
4244           struct charset *charset = char_charset (c, charset_list, &code);
4245
4246           if (! charset)
4247             {
4248               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4249                 {
4250                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4251                   charset = CHARSET_FROM_ID (charset_ascii);
4252                 }
4253               else
4254                 {
4255                   c = coding->default_char;
4256                   charset = char_charset (c, charset_list, &code);
4257                 }
4258             }
4259           if (code == CHARSET_INVALID_CODE (charset))
4260             abort ();
4261           if (charset == charset_big5)
4262             {
4263               int c1, c2;
4264
4265               c1 = code >> 8, c2 = code & 0xFF;
4266               EMIT_TWO_BYTES (c1, c2);
4267             }
4268           else
4269             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4270         }
4271     }
4272   coding->result = CODING_RESULT_SUCCESS;
4273   coding->produced_char += produced_chars;
4274   coding->produced = dst - coding->destination;
4275   return 0;
4276 }
4277
4278 \f
4279 /*** 10. CCL handlers ***/
4280
4281 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4282    Check if a text is encoded in a coding system of which
4283    encoder/decoder are written in CCL program.  If it is, return
4284    CATEGORY_MASK_CCL, else return 0.  */
4285
4286 static int
4287 detect_coding_ccl (coding, detect_info)
4288      struct coding_system *coding;
4289      struct coding_detection_info *detect_info;
4290 {
4291   const unsigned char *src = coding->source, *src_base = src;
4292   const unsigned char *src_end = coding->source + coding->src_bytes;
4293   int multibytep = coding->src_multibyte;
4294   int consumed_chars = 0;
4295   int found = 0;
4296   unsigned char *valids = CODING_CCL_VALIDS (coding);
4297   int head_ascii = coding->head_ascii;
4298   Lisp_Object attrs;
4299
4300   detect_info->checked |= CATEGORY_MASK_CCL;
4301
4302   coding = &coding_categories[coding_category_ccl];
4303   attrs = CODING_ID_ATTRS (coding->id);
4304   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4305     src += head_ascii;
4306
4307   while (1)
4308     {
4309       int c;
4310       ONE_MORE_BYTE (c);
4311       if (! valids[c])
4312         break;
4313       if ((valids[c] > 1))
4314         found = CATEGORY_MASK_CCL;
4315     }
4316   detect_info->rejected |= CATEGORY_MASK_CCL;
4317   return 0;
4318
4319  no_more_source:
4320   detect_info->found |= found;
4321   return 1;
4322 }
4323
4324 static void
4325 decode_coding_ccl (coding)
4326      struct coding_system *coding;
4327 {
4328   const unsigned char *src = coding->source + coding->consumed;
4329   const unsigned char *src_end = coding->source + coding->src_bytes;
4330   int *charbuf = coding->charbuf;
4331   int *charbuf_end = charbuf + coding->charbuf_size;
4332   int consumed_chars = 0;
4333   int multibytep = coding->src_multibyte;
4334   struct ccl_program ccl;
4335   int source_charbuf[1024];
4336   int source_byteidx[1024];
4337   Lisp_Object attrs, eol_type, charset_list;
4338
4339   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4340   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4341
4342   while (src < src_end)
4343     {
4344       const unsigned char *p = src;
4345       int *source, *source_end;
4346       int i = 0;
4347
4348       if (multibytep)
4349         while (i < 1024 && p < src_end)
4350           {
4351             source_byteidx[i] = p - src;
4352             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4353           }
4354       else
4355         while (i < 1024 && p < src_end)
4356           source_charbuf[i++] = *p++;
4357
4358       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4359         ccl.last_block = 1;
4360
4361       source = source_charbuf;
4362       source_end = source + i;
4363       while (source < source_end)
4364         {
4365           ccl_driver (&ccl, source, charbuf,
4366                       source_end - source, charbuf_end - charbuf,
4367                       charset_list);
4368           source += ccl.consumed;
4369           charbuf += ccl.produced;
4370           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4371             break;
4372         }
4373       if (source < source_end)
4374         src += source_byteidx[source - source_charbuf];
4375       else
4376         src = p;
4377       consumed_chars += source - source_charbuf;
4378
4379       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4380           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4381         break;
4382     }
4383
4384   switch (ccl.status)
4385     {
4386     case CCL_STAT_SUSPEND_BY_SRC:
4387       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4388       break;
4389     case CCL_STAT_SUSPEND_BY_DST:
4390       break;
4391     case CCL_STAT_QUIT:
4392     case CCL_STAT_INVALID_CMD:
4393       coding->result = CODING_RESULT_INTERRUPT;
4394       break;
4395     default:
4396       coding->result = CODING_RESULT_SUCCESS;
4397       break;
4398     }
4399   coding->consumed_char += consumed_chars;
4400   coding->consumed = src - coding->source;
4401   coding->charbuf_used = charbuf - coding->charbuf;
4402 }
4403
4404 static int
4405 encode_coding_ccl (coding)
4406      struct coding_system *coding;
4407 {
4408   struct ccl_program ccl;
4409   int multibytep = coding->dst_multibyte;
4410   int *charbuf = coding->charbuf;
4411   int *charbuf_end = charbuf + coding->charbuf_used;
4412   unsigned char *dst = coding->destination + coding->produced;
4413   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4414   unsigned char *adjusted_dst_end = dst_end - 1;
4415   int destination_charbuf[1024];
4416   int i, produced_chars = 0;
4417   Lisp_Object attrs, eol_type, charset_list;
4418
4419   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4420   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4421
4422   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4423   ccl.dst_multibyte = coding->dst_multibyte;
4424
4425   while (charbuf < charbuf_end && dst < adjusted_dst_end)
4426     {
4427       int dst_bytes = dst_end - dst;
4428       if (dst_bytes > 1024)
4429         dst_bytes = 1024;
4430
4431       ccl_driver (&ccl, charbuf, destination_charbuf,
4432                   charbuf_end - charbuf, dst_bytes, charset_list);
4433       charbuf += ccl.consumed;
4434       if (multibytep)
4435         for (i = 0; i < ccl.produced; i++)
4436           EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4437       else
4438         {
4439           for (i = 0; i < ccl.produced; i++)
4440             *dst++ = destination_charbuf[i] & 0xFF;
4441           produced_chars += ccl.produced;
4442         }
4443     }
4444
4445   switch (ccl.status)
4446     {
4447     case CCL_STAT_SUSPEND_BY_SRC:
4448       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4449       break;
4450     case CCL_STAT_SUSPEND_BY_DST:
4451       coding->result = CODING_RESULT_INSUFFICIENT_DST;
4452       break;
4453     case CCL_STAT_QUIT:
4454     case CCL_STAT_INVALID_CMD:
4455       coding->result = CODING_RESULT_INTERRUPT;
4456       break;
4457     default:
4458       coding->result = CODING_RESULT_SUCCESS;
4459       break;
4460     }
4461
4462   coding->produced_char += produced_chars;
4463   coding->produced = dst - coding->destination;
4464   return 0;
4465 }
4466
4467
4468 \f
4469 /*** 10, 11. no-conversion handlers ***/
4470
4471 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4472
4473 static void
4474 decode_coding_raw_text (coding)
4475      struct coding_system *coding;
4476 {
4477   coding->chars_at_source = 1;
4478   coding->consumed_char = 0;
4479   coding->consumed = 0;
4480   coding->result = CODING_RESULT_SUCCESS;
4481 }
4482
4483 static int
4484 encode_coding_raw_text (coding)
4485      struct coding_system *coding;
4486 {
4487   int multibytep = coding->dst_multibyte;
4488   int *charbuf = coding->charbuf;
4489   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4490   unsigned char *dst = coding->destination + coding->produced;
4491   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4492   int produced_chars = 0;
4493   int c;
4494
4495   if (multibytep)
4496     {
4497       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4498
4499       if (coding->src_multibyte)
4500         while (charbuf < charbuf_end)
4501           {
4502             ASSURE_DESTINATION (safe_room);
4503             c = *charbuf++;
4504             if (ASCII_CHAR_P (c))
4505               EMIT_ONE_ASCII_BYTE (c);
4506             else if (CHAR_BYTE8_P (c))
4507               {
4508                 c = CHAR_TO_BYTE8 (c);
4509                 EMIT_ONE_BYTE (c);
4510               }
4511             else
4512               {
4513                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4514
4515                 CHAR_STRING_ADVANCE (c, p1);
4516                 while (p0 < p1)
4517                   {
4518                     EMIT_ONE_BYTE (*p0);
4519                     p0++;
4520                   }
4521               }
4522           }
4523       else
4524         while (charbuf < charbuf_end)
4525           {
4526             ASSURE_DESTINATION (safe_room);
4527             c = *charbuf++;
4528             EMIT_ONE_BYTE (c);
4529           }
4530     }
4531   else
4532     {
4533       if (coding->src_multibyte)
4534         {
4535           int safe_room = MAX_MULTIBYTE_LENGTH;
4536
4537           while (charbuf < charbuf_end)
4538             {
4539               ASSURE_DESTINATION (safe_room);
4540               c = *charbuf++;
4541               if (ASCII_CHAR_P (c))
4542                 *dst++ = c;
4543               else if (CHAR_BYTE8_P (c))
4544                 *dst++ = CHAR_TO_BYTE8 (c);
4545               else
4546                 CHAR_STRING_ADVANCE (c, dst);
4547               produced_chars++;
4548             }
4549         }
4550       else
4551         {
4552           ASSURE_DESTINATION (charbuf_end - charbuf);
4553           while (charbuf < charbuf_end && dst < dst_end)
4554             *dst++ = *charbuf++;
4555           produced_chars = dst - (coding->destination + coding->dst_bytes);
4556         }
4557     }
4558   coding->result = CODING_RESULT_SUCCESS;
4559   coding->produced_char += produced_chars;
4560   coding->produced = dst - coding->destination;
4561   return 0;
4562 }
4563
4564 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4565    Check if a text is encoded in a charset-based coding system.  If it
4566    is, return 1, else return 0.  */
4567
4568 static int
4569 detect_coding_charset (coding, detect_info)
4570      struct coding_system *coding;
4571      struct coding_detection_info *detect_info;
4572 {
4573   const unsigned char *src = coding->source, *src_base = src;
4574   const unsigned char *src_end = coding->source + coding->src_bytes;
4575   int multibytep = coding->src_multibyte;
4576   int consumed_chars = 0;
4577   Lisp_Object attrs, valids;
4578   int found = 0;
4579
4580   detect_info->checked |= CATEGORY_MASK_CHARSET;
4581
4582   coding = &coding_categories[coding_category_charset];
4583   attrs = CODING_ID_ATTRS (coding->id);
4584   valids = AREF (attrs, coding_attr_charset_valids);
4585
4586   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4587     src += coding->head_ascii;
4588
4589   while (1)
4590     {
4591       int c;
4592
4593       ONE_MORE_BYTE (c);
4594       if (NILP (AREF (valids, c)))
4595         break;
4596       if (c >= 0x80)
4597         found = CATEGORY_MASK_CHARSET;
4598     }
4599   detect_info->rejected |= CATEGORY_MASK_CHARSET;
4600   return 0;
4601
4602  no_more_source:
4603   detect_info->found |= found;
4604   return 1;
4605 }
4606
4607 static void
4608 decode_coding_charset (coding)
4609      struct coding_system *coding;
4610 {
4611   const unsigned char *src = coding->source + coding->consumed;
4612   const unsigned char *src_end = coding->source + coding->src_bytes;
4613   const unsigned char *src_base;
4614   int *charbuf = coding->charbuf;
4615   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4616   int consumed_chars = 0, consumed_chars_base;
4617   int multibytep = coding->src_multibyte;
4618   Lisp_Object attrs, eol_type, charset_list, valids;
4619   int char_offset = coding->produced_char;
4620   int last_offset = char_offset;
4621   int last_id = charset_ascii;
4622
4623   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4624   valids = AREF (attrs, coding_attr_charset_valids);
4625
4626   while (1)
4627     {
4628       int c;
4629
4630       src_base = src;
4631       consumed_chars_base = consumed_chars;
4632
4633       if (charbuf >= charbuf_end)
4634         break;
4635
4636       ONE_MORE_BYTE (c);
4637       if (c == '\r')
4638         {
4639           /* Here we assume that no charset maps '\r' to something
4640              else.  */
4641           if (EQ (eol_type, Qdos))
4642             {
4643               if (src == src_end)
4644                 {
4645                   coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4646                   goto no_more_source;
4647                 }
4648               if (*src == '\n')
4649                 ONE_MORE_BYTE (c);
4650             }
4651           else if (EQ (eol_type, Qmac))
4652             c = '\n';
4653         }
4654       else
4655         {
4656           Lisp_Object val;
4657           struct charset *charset;
4658           int dim;
4659           int len = 1;
4660           unsigned code = c;
4661
4662           val = AREF (valids, c);
4663           if (NILP (val))
4664             goto invalid_code;
4665           if (INTEGERP (val))
4666             {
4667               charset = CHARSET_FROM_ID (XFASTINT (val));
4668               dim = CHARSET_DIMENSION (charset);
4669               while (len < dim)
4670                 {
4671                   ONE_MORE_BYTE (c);
4672                   code = (code << 8) | c;
4673                   len++;
4674                 }
4675               CODING_DECODE_CHAR (coding, src, src_base, src_end,
4676                                   charset, code, c);
4677             }
4678           else
4679             {
4680               /* VAL is a list of charset IDs.  It is assured that the
4681                  list is sorted by charset dimensions (smaller one
4682                  comes first).  */
4683               while (CONSP (val))
4684                 {
4685                   charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4686                   dim = CHARSET_DIMENSION (charset);
4687                   while (len < dim)
4688                     {
4689                       ONE_MORE_BYTE (c);
4690                       code = (code << 8) | c;
4691                       len++;
4692                     }
4693                   CODING_DECODE_CHAR (coding, src, src_base,
4694                                       src_end, charset, code, c);
4695                   if (c >= 0)
4696                     break;
4697                   val = XCDR (val);
4698                 }
4699             }
4700           if (c < 0)
4701             goto invalid_code;
4702           if (charset->id != charset_ascii
4703               && last_id != charset->id)
4704             {
4705               if (last_id != charset_ascii)
4706                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4707               last_id = charset->id;
4708               last_offset = char_offset;
4709             }
4710         }
4711       *charbuf++ = c;
4712       char_offset++;
4713       continue;
4714
4715     invalid_code:
4716       src = src_base;
4717       consumed_chars = consumed_chars_base;
4718       ONE_MORE_BYTE (c);
4719       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4720       char_offset++;
4721       coding->errors++;
4722     }
4723
4724  no_more_source:
4725   if (last_id != charset_ascii)
4726     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4727   coding->consumed_char += consumed_chars_base;
4728   coding->consumed = src_base - coding->source;
4729   coding->charbuf_used = charbuf - coding->charbuf;
4730 }
4731
4732 static int
4733 encode_coding_charset (coding)
4734      struct coding_system *coding;
4735 {
4736   int multibytep = coding->dst_multibyte;
4737   int *charbuf = coding->charbuf;
4738   int *charbuf_end = charbuf + coding->charbuf_used;
4739   unsigned char *dst = coding->destination + coding->produced;
4740   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4741   int safe_room = MAX_MULTIBYTE_LENGTH;
4742   int produced_chars = 0;
4743   Lisp_Object attrs, eol_type, charset_list;
4744   int ascii_compatible;
4745   int c;
4746
4747   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4748   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4749
4750   while (charbuf < charbuf_end)
4751     {
4752       struct charset *charset;
4753       unsigned code;
4754
4755       ASSURE_DESTINATION (safe_room);
4756       c = *charbuf++;
4757       if (ascii_compatible && ASCII_CHAR_P (c))
4758         EMIT_ONE_ASCII_BYTE (c);
4759       else if (CHAR_BYTE8_P (c))
4760         {
4761           c = CHAR_TO_BYTE8 (c);
4762           EMIT_ONE_BYTE (c);
4763         }
4764       else
4765         {
4766           charset = char_charset (c, charset_list, &code);
4767           if (charset)
4768             {
4769               if (CHARSET_DIMENSION (charset) == 1)
4770                 EMIT_ONE_BYTE (code);
4771               else if (CHARSET_DIMENSION (charset) == 2)
4772                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4773               else if (CHARSET_DIMENSION (charset) == 3)
4774                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4775               else
4776                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4777                                  (code >> 8) & 0xFF, code & 0xFF);
4778             }
4779           else
4780             {
4781               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4782                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4783               else
4784                 c = coding->default_char;
4785               EMIT_ONE_BYTE (c);
4786             }
4787         }
4788     }
4789
4790   coding->result = CODING_RESULT_SUCCESS;
4791   coding->produced_char += produced_chars;
4792   coding->produced = dst - coding->destination;
4793   return 0;
4794 }
4795
4796 \f
4797 /*** 7. C library functions ***/
4798
4799 /* Setup coding context CODING from information about CODING_SYSTEM.
4800    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
4801    CODING_SYSTEM is invalid, signal an error.  */
4802
4803 void
4804 setup_coding_system (coding_system, coding)
4805      Lisp_Object coding_system;
4806      struct coding_system *coding;
4807 {
4808   Lisp_Object attrs;
4809   Lisp_Object eol_type;
4810   Lisp_Object coding_type;
4811   Lisp_Object val;
4812
4813   if (NILP (coding_system))
4814     coding_system = Qno_conversion;
4815
4816   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4817
4818   attrs = CODING_ID_ATTRS (coding->id);
4819   eol_type = CODING_ID_EOL_TYPE (coding->id);
4820
4821   coding->mode = 0;
4822   coding->head_ascii = -1;
4823   coding->common_flags
4824     = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4825   if (! NILP (CODING_ATTR_POST_READ (attrs)))
4826     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
4827   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
4828     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
4829   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
4830     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4831
4832   val = CODING_ATTR_SAFE_CHARSETS (attrs);
4833   coding->max_charset_id = SCHARS (val) - 1;
4834   coding->safe_charsets = (char *) SDATA (val);
4835   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4836
4837   coding_type = CODING_ATTR_TYPE (attrs);
4838   if (EQ (coding_type, Qundecided))
4839     {
4840       coding->detector = NULL;
4841       coding->decoder = decode_coding_raw_text;
4842       coding->encoder = encode_coding_raw_text;
4843       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4844     }
4845   else if (EQ (coding_type, Qiso_2022))
4846     {
4847       int i;
4848       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4849
4850       /* Invoke graphic register 0 to plane 0.  */
4851       CODING_ISO_INVOCATION (coding, 0) = 0;
4852       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
4853       CODING_ISO_INVOCATION (coding, 1)
4854         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4855       /* Setup the initial status of designation.  */
4856       for (i = 0; i < 4; i++)
4857         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4858       /* Not single shifting initially.  */
4859       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4860       /* Beginning of buffer should also be regarded as bol. */
4861       CODING_ISO_BOL (coding) = 1;
4862       coding->detector = detect_coding_iso_2022;
4863       coding->decoder = decode_coding_iso_2022;
4864       coding->encoder = encode_coding_iso_2022;
4865       if (flags & CODING_ISO_FLAG_SAFE)
4866         coding->mode |= CODING_MODE_SAFE_ENCODING;
4867       coding->common_flags
4868         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4869             | CODING_REQUIRE_FLUSHING_MASK);
4870       if (flags & CODING_ISO_FLAG_COMPOSITION)
4871         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4872       if (flags & CODING_ISO_FLAG_DESIGNATION)
4873         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
4874       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4875         {
4876           setup_iso_safe_charsets (attrs);
4877           val = CODING_ATTR_SAFE_CHARSETS (attrs);
4878           coding->max_charset_id = SCHARS (val) - 1;
4879           coding->safe_charsets = (char *) SDATA (val);
4880         }
4881       CODING_ISO_FLAGS (coding) = flags;
4882     }
4883   else if (EQ (coding_type, Qcharset))
4884     {
4885       coding->detector = detect_coding_charset;
4886       coding->decoder = decode_coding_charset;
4887       coding->encoder = encode_coding_charset;
4888       coding->common_flags
4889         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4890     }
4891   else if (EQ (coding_type, Qutf_8))
4892     {
4893       coding->detector = detect_coding_utf_8;
4894       coding->decoder = decode_coding_utf_8;
4895       coding->encoder = encode_coding_utf_8;
4896       coding->common_flags
4897         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4898     }
4899   else if (EQ (coding_type, Qutf_16))
4900     {
4901       val = AREF (attrs, coding_attr_utf_16_bom);
4902       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4903                                     : EQ (val, Qt) ? utf_16_with_bom
4904                                     : utf_16_without_bom);
4905       val = AREF (attrs, coding_attr_utf_16_endian);
4906       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
4907                                        : utf_16_little_endian);
4908       CODING_UTF_16_SURROGATE (coding) = 0;
4909       coding->detector = detect_coding_utf_16;
4910       coding->decoder = decode_coding_utf_16;
4911       coding->encoder = encode_coding_utf_16;
4912       coding->common_flags
4913         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4914       if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
4915         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4916     }
4917   else if (EQ (coding_type, Qccl))
4918     {
4919       coding->detector = detect_coding_ccl;
4920       coding->decoder = decode_coding_ccl;
4921       coding->encoder = encode_coding_ccl;
4922       coding->common_flags
4923         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4924             | CODING_REQUIRE_FLUSHING_MASK);
4925     }
4926   else if (EQ (coding_type, Qemacs_mule))
4927     {
4928       coding->detector = detect_coding_emacs_mule;
4929       coding->decoder = decode_coding_emacs_mule;
4930       coding->encoder = encode_coding_emacs_mule;
4931       coding->common_flags
4932         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4933       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4934           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4935         {
4936           Lisp_Object tail, safe_charsets;
4937           int max_charset_id = 0;
4938
4939           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4940                tail = XCDR (tail))
4941             if (max_charset_id < XFASTINT (XCAR (tail)))
4942               max_charset_id = XFASTINT (XCAR (tail));
4943           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4944                                         make_number (255));
4945           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4946                tail = XCDR (tail))
4947             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
4948           coding->max_charset_id = max_charset_id;
4949           coding->safe_charsets = (char *) SDATA (safe_charsets);
4950         }
4951     }
4952   else if (EQ (coding_type, Qshift_jis))
4953     {
4954       coding->detector = detect_coding_sjis;
4955       coding->decoder = decode_coding_sjis;
4956       coding->encoder = encode_coding_sjis;
4957       coding->common_flags
4958         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4959     }
4960   else if (EQ (coding_type, Qbig5))
4961     {
4962       coding->detector = detect_coding_big5;
4963       coding->decoder = decode_coding_big5;
4964       coding->encoder = encode_coding_big5;
4965       coding->common_flags
4966         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4967     }
4968   else                          /* EQ (coding_type, Qraw_text) */
4969     {
4970       coding->detector = NULL;
4971       coding->decoder = decode_coding_raw_text;
4972       coding->encoder = encode_coding_raw_text;
4973     }
4974
4975   return;
4976 }
4977
4978 /* Return raw-text or one of its subsidiaries that has the same
4979    eol_type as CODING-SYSTEM.  */
4980
4981 Lisp_Object
4982 raw_text_coding_system (coding_system)
4983      Lisp_Object coding_system;
4984 {
4985   Lisp_Object spec, attrs;
4986   Lisp_Object eol_type, raw_text_eol_type;
4987
4988   if (NILP (coding_system))
4989     return Qraw_text;
4990   spec = CODING_SYSTEM_SPEC (coding_system);
4991   attrs = AREF (spec, 0);
4992
4993   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4994     return coding_system;
4995
4996   eol_type = AREF (spec, 2);
4997   if (VECTORP (eol_type))
4998     return Qraw_text;
4999   spec = CODING_SYSTEM_SPEC (Qraw_text);
5000   raw_text_eol_type = AREF (spec, 2);
5001   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5002           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5003           : AREF (raw_text_eol_type, 2));
5004 }
5005
5006
5007 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5008    does, return one of the subsidiary that has the same eol-spec as
5009    PARENT.  Otherwise, return CODING_SYSTEM.  */
5010
5011 Lisp_Object
5012 coding_inherit_eol_type (coding_system, parent)
5013      Lisp_Object coding_system, parent;
5014 {
5015   Lisp_Object spec, eol_type;
5016
5017   if (NILP (coding_system))
5018     coding_system = Qraw_text;
5019   spec = CODING_SYSTEM_SPEC (coding_system);
5020   eol_type = AREF (spec, 2);
5021   if (VECTORP (eol_type)
5022       && ! NILP (parent))
5023     {
5024       Lisp_Object parent_spec;
5025       Lisp_Object parent_eol_type;
5026
5027       parent_spec
5028         = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
5029       parent_eol_type = AREF (parent_spec, 2);
5030       if (EQ (parent_eol_type, Qunix))
5031         coding_system = AREF (eol_type, 0);
5032       else if (EQ (parent_eol_type, Qdos))
5033         coding_system = AREF (eol_type, 1);
5034       else if (EQ (parent_eol_type, Qmac))
5035         coding_system = AREF (eol_type, 2);
5036     }
5037   return coding_system;
5038 }
5039
5040 /* Emacs has a mechanism to automatically detect a coding system if it
5041    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5042    it's impossible to distinguish some coding systems accurately
5043    because they use the same range of codes.  So, at first, coding
5044    systems are categorized into 7, those are:
5045
5046    o coding-category-emacs-mule
5047
5048         The category for a coding system which has the same code range
5049         as Emacs' internal format.  Assigned the coding-system (Lisp
5050         symbol) `emacs-mule' by default.
5051
5052    o coding-category-sjis
5053
5054         The category for a coding system which has the same code range
5055         as SJIS.  Assigned the coding-system (Lisp
5056         symbol) `japanese-shift-jis' by default.
5057
5058    o coding-category-iso-7
5059
5060         The category for a coding system which has the same code range
5061         as ISO2022 of 7-bit environment.  This doesn't use any locking
5062         shift and single shift functions.  This can encode/decode all
5063         charsets.  Assigned the coding-system (Lisp symbol)
5064         `iso-2022-7bit' by default.
5065
5066    o coding-category-iso-7-tight
5067
5068         Same as coding-category-iso-7 except that this can
5069         encode/decode only the specified charsets.
5070
5071    o coding-category-iso-8-1
5072
5073         The category for a coding system which has the same code range
5074         as ISO2022 of 8-bit environment and graphic plane 1 used only
5075         for DIMENSION1 charset.  This doesn't use any locking shift
5076         and single shift functions.  Assigned the coding-system (Lisp
5077         symbol) `iso-latin-1' by default.
5078
5079    o coding-category-iso-8-2
5080
5081         The category for a coding system which has the same code range
5082         as ISO2022 of 8-bit environment and graphic plane 1 used only
5083         for DIMENSION2 charset.  This doesn't use any locking shift
5084         and single shift functions.  Assigned the coding-system (Lisp
5085         symbol) `japanese-iso-8bit' by default.
5086
5087    o coding-category-iso-7-else
5088
5089         The category for a coding system which has the same code range
5090         as ISO2022 of 7-bit environemnt but uses locking shift or
5091         single shift functions.  Assigned the coding-system (Lisp
5092         symbol) `iso-2022-7bit-lock' by default.
5093
5094    o coding-category-iso-8-else
5095
5096         The category for a coding system which has the same code range
5097         as ISO2022 of 8-bit environemnt but uses locking shift or
5098         single shift functions.  Assigned the coding-system (Lisp
5099         symbol) `iso-2022-8bit-ss2' by default.
5100
5101    o coding-category-big5
5102
5103         The category for a coding system which has the same code range
5104         as BIG5.  Assigned the coding-system (Lisp symbol)
5105         `cn-big5' by default.
5106
5107    o coding-category-utf-8
5108
5109         The category for a coding system which has the same code range
5110         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
5111         symbol) `utf-8' by default.
5112
5113    o coding-category-utf-16-be
5114
5115         The category for a coding system in which a text has an
5116         Unicode signature (cf. Unicode Standard) in the order of BIG
5117         endian at the head.  Assigned the coding-system (Lisp symbol)
5118         `utf-16-be' by default.
5119
5120    o coding-category-utf-16-le
5121
5122         The category for a coding system in which a text has an
5123         Unicode signature (cf. Unicode Standard) in the order of
5124         LITTLE endian at the head.  Assigned the coding-system (Lisp
5125         symbol) `utf-16-le' by default.
5126
5127    o coding-category-ccl
5128
5129         The category for a coding system of which encoder/decoder is
5130         written in CCL programs.  The default value is nil, i.e., no
5131         coding system is assigned.
5132
5133    o coding-category-binary
5134
5135         The category for a coding system not categorized in any of the
5136         above.  Assigned the coding-system (Lisp symbol)
5137         `no-conversion' by default.
5138
5139    Each of them is a Lisp symbol and the value is an actual
5140    `coding-system's (this is also a Lisp symbol) assigned by a user.
5141    What Emacs does actually is to detect a category of coding system.
5142    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5143    decide only one possible category, it selects a category of the
5144    highest priority.  Priorities of categories are also specified by a
5145    user in a Lisp variable `coding-category-list'.
5146
5147 */
5148
5149 #define EOL_SEEN_NONE   0
5150 #define EOL_SEEN_LF     1
5151 #define EOL_SEEN_CR     2
5152 #define EOL_SEEN_CRLF   4
5153
5154 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5155    SOURCE is encoded.  If CATEGORY is one of
5156    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5157    two-byte, else they are encoded by one-byte.
5158
5159    Return one of EOL_SEEN_XXX.  */
5160
5161 #define MAX_EOL_CHECK_COUNT 3
5162
5163 static int
5164 detect_eol (source, src_bytes, category)
5165      unsigned char *source;
5166      EMACS_INT src_bytes;
5167      enum coding_category category;
5168 {
5169   unsigned char *src = source, *src_end = src + src_bytes;
5170   unsigned char c;
5171   int total  = 0;
5172   int eol_seen = EOL_SEEN_NONE;
5173
5174   if ((1 << category) & CATEGORY_MASK_UTF_16)
5175     {
5176       int msb, lsb;
5177
5178       msb = category == (coding_category_utf_16_le
5179                          | coding_category_utf_16_le_nosig);
5180       lsb = 1 - msb;
5181
5182       while (src + 1 < src_end)
5183         {
5184           c = src[lsb];
5185           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5186             {
5187               int this_eol;
5188
5189               if (c == '\n')
5190                 this_eol = EOL_SEEN_LF;
5191               else if (src + 3 >= src_end
5192                        || src[msb + 2] != 0
5193                        || src[lsb + 2] != '\n')
5194                 this_eol = EOL_SEEN_CR;
5195               else
5196                 this_eol = EOL_SEEN_CRLF;
5197
5198               if (eol_seen == EOL_SEEN_NONE)
5199                 /* This is the first end-of-line.  */
5200                 eol_seen = this_eol;
5201               else if (eol_seen != this_eol)
5202                 {
5203                   /* The found type is different from what found before.  */
5204                   eol_seen = EOL_SEEN_LF;
5205                   break;
5206                 }
5207               if (++total == MAX_EOL_CHECK_COUNT)
5208                 break;
5209             }
5210           src += 2;
5211         }
5212     }
5213   else
5214     {
5215       while (src < src_end)
5216         {
5217           c = *src++;
5218           if (c == '\n' || c == '\r')
5219             {
5220               int this_eol;
5221
5222               if (c == '\n')
5223                 this_eol = EOL_SEEN_LF;
5224               else if (src >= src_end || *src != '\n')
5225                 this_eol = EOL_SEEN_CR;
5226               else
5227                 this_eol = EOL_SEEN_CRLF, src++;
5228
5229               if (eol_seen == EOL_SEEN_NONE)
5230                 /* This is the first end-of-line.  */
5231                 eol_seen = this_eol;
5232               else if (eol_seen != this_eol)
5233                 {
5234                   /* The found type is different from what found before.  */
5235                   eol_seen = EOL_SEEN_LF;
5236                   break;
5237                 }
5238               if (++total == MAX_EOL_CHECK_COUNT)
5239                 break;
5240             }
5241         }
5242     }
5243   return eol_seen;
5244 }
5245
5246
5247 static void
5248 adjust_coding_eol_type (coding, eol_seen)
5249      struct coding_system *coding;
5250      int eol_seen;
5251 {
5252   Lisp_Object eol_type;
5253
5254   eol_type = CODING_ID_EOL_TYPE (coding->id);
5255   if (eol_seen & EOL_SEEN_LF)
5256     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5257   else if (eol_seen & EOL_SEEN_CRLF)
5258     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5259   else if (eol_seen & EOL_SEEN_CR)
5260     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5261 }
5262
5263 /* Detect how a text specified in CODING is encoded.  If a coding
5264    system is detected, update fields of CODING by the detected coding
5265    system.  */
5266
5267 void
5268 detect_coding (coding)
5269      struct coding_system *coding;
5270 {
5271   const unsigned char *src, *src_end;
5272   Lisp_Object attrs, coding_type;
5273
5274   coding->consumed = coding->consumed_char = 0;
5275   coding->produced = coding->produced_char = 0;
5276   coding_set_source (coding);
5277
5278   src_end = coding->source + coding->src_bytes;
5279
5280   /* If we have not yet decided the text encoding type, detect it
5281      now.  */
5282   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5283     {
5284       int c, i;
5285
5286       for (src = coding->source; src < src_end; src++)
5287         {
5288           c = *src;
5289           if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5290                                         || c == ISO_CODE_SI
5291                                         || c == ISO_CODE_SO)))
5292             break;
5293         }
5294       coding->head_ascii = src - (coding->source + coding->consumed);
5295
5296       if (coding->head_ascii < coding->src_bytes)
5297         {
5298           struct coding_detection_info detect_info;
5299           enum coding_category category;
5300           struct coding_system *this;
5301
5302           detect_info.checked = detect_info.found = detect_info.rejected = 0;
5303           for (i = 0; i < coding_category_raw_text; i++)
5304             {
5305               category = coding_priorities[i];
5306               this = coding_categories + category;
5307               if (this->id < 0)
5308                 {
5309                   /* No coding system of this category is defined.  */
5310                   detect_info.rejected |= (1 << category);
5311                 }
5312               else if (category >= coding_category_raw_text)
5313                 continue;
5314               else if (detect_info.checked & (1 << category))
5315                 {
5316                   if (detect_info.found & (1 << category))
5317                     break;
5318                 }
5319               else if ((*(this->detector)) (coding, &detect_info)
5320                        && detect_info.found & (1 << category))
5321                 break;
5322             }
5323           if (i < coding_category_raw_text)
5324             setup_coding_system (CODING_ID_NAME (this->id), coding);
5325           else if (detect_info.rejected == CATEGORY_MASK_ANY)
5326             setup_coding_system (Qraw_text, coding);
5327           else if (detect_info.rejected)
5328             for (i = 0; i < coding_category_raw_text; i++)
5329               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5330                 {
5331                   this = coding_categories + coding_priorities[i];
5332                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5333                   break;
5334                 }
5335         }
5336     }
5337   else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16))
5338     {
5339       Lisp_Object coding_systems;
5340       struct coding_detection_info detect_info;
5341
5342       coding_systems
5343         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5344       detect_info.found = detect_info.rejected = 0;
5345       if (CONSP (coding_systems)
5346           && detect_coding_utf_16 (coding, &detect_info)
5347           && (detect_info.found & (CATEGORY_MASK_UTF_16_LE
5348                                    | CATEGORY_MASK_UTF_16_BE)))
5349         {
5350           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5351             setup_coding_system (XCAR (coding_systems), coding);
5352           else
5353             setup_coding_system (XCDR (coding_systems), coding);
5354         }
5355     }
5356
5357   attrs = CODING_ID_ATTRS (coding->id);
5358   coding_type = CODING_ATTR_TYPE (attrs);
5359
5360   /* If we have not yet decided the EOL type, detect it now.  But, the
5361      detection is impossible for a CCL based coding system, in which
5362      case, we detct the EOL type after decoding.  */
5363   if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5364       && ! EQ (coding_type, Qccl))
5365     {
5366       int eol_seen = detect_eol (coding->source, coding->src_bytes,
5367                                  (enum coding_category) XINT (CODING_ATTR_CATEGORY (attrs)));
5368
5369       if (eol_seen != EOL_SEEN_NONE)
5370         adjust_coding_eol_type (coding, eol_seen);
5371     }
5372 }
5373
5374
5375 static void
5376 decode_eol (coding)
5377      struct coding_system *coding;
5378 {
5379   if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
5380     {
5381       unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5382       unsigned char *pend = p + coding->produced;
5383       int eol_seen = EOL_SEEN_NONE;
5384
5385       for (; p < pend; p++)
5386         {
5387           if (*p == '\n')
5388             eol_seen |= EOL_SEEN_LF;
5389           else if (*p == '\r')
5390             {
5391               if (p + 1 < pend && *(p + 1) == '\n')
5392                 {
5393                   eol_seen |= EOL_SEEN_CRLF;
5394                   p++;
5395                 }
5396               else
5397                 eol_seen |= EOL_SEEN_CR;
5398             }
5399         }
5400       if (eol_seen != EOL_SEEN_NONE)
5401         adjust_coding_eol_type (coding, eol_seen);
5402     }
5403
5404   if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5405     {
5406       unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5407       unsigned char *pend = p + coding->produced;
5408
5409       for (; p < pend; p++)
5410         if (*p == '\r')
5411           *p = '\n';
5412     }
5413   else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5414     {
5415       unsigned char *p, *pbeg, *pend;
5416       Lisp_Object undo_list;
5417
5418       move_gap_both (coding->dst_pos + coding->produced_char,
5419                      coding->dst_pos_byte + coding->produced);
5420       undo_list = current_buffer->undo_list;
5421       current_buffer->undo_list = Qt;
5422       del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
5423       current_buffer->undo_list = undo_list;
5424       pbeg = GPT_ADDR;
5425       pend = pbeg + coding->produced;
5426
5427       for (p = pend - 1; p >= pbeg; p--)
5428         if (*p == '\r')
5429           {
5430             safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5431             pend--;
5432           }
5433       coding->produced_char -= coding->produced - (pend - pbeg);
5434       coding->produced = pend - pbeg;
5435       insert_from_gap (coding->produced_char, coding->produced);
5436     }
5437 }
5438
5439 static void
5440 translate_chars (coding, table)
5441      struct coding_system *coding;
5442      Lisp_Object table;
5443 {
5444   int *charbuf = coding->charbuf;
5445   int *charbuf_end = charbuf + coding->charbuf_used;
5446   int c;
5447
5448   if (coding->chars_at_source)
5449     return;
5450
5451   while (charbuf < charbuf_end)
5452     {
5453       c = *charbuf;
5454       if (c < 0)
5455         charbuf += c;
5456       else
5457         *charbuf++ = translate_char (table, c);
5458     }
5459 }
5460
5461 static int
5462 produce_chars (coding)
5463      struct coding_system *coding;
5464 {
5465   unsigned char *dst = coding->destination + coding->produced;
5466   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5467   int produced;
5468   int produced_chars = 0;
5469
5470   if (! coding->chars_at_source)
5471     {
5472       /* Characters are in coding->charbuf.  */
5473       EMACS_INT *buf = coding->charbuf;
5474       EMACS_INT *buf_end = buf + coding->charbuf_used;
5475       unsigned char *adjusted_dst_end;
5476
5477       if (BUFFERP (coding->src_object)
5478           && EQ (coding->src_object, coding->dst_object))
5479         dst_end = ((unsigned char *) coding->source) + coding->consumed;
5480       adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5481
5482       while (buf < buf_end)
5483         {
5484           int c = *buf++;
5485
5486           if (dst >= adjusted_dst_end)
5487             {
5488               dst = alloc_destination (coding,
5489                                        buf_end - buf + MAX_MULTIBYTE_LENGTH,
5490                                        dst);
5491               dst_end = coding->destination + coding->dst_bytes;
5492               adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5493             }
5494           if (c >= 0)
5495             {
5496               if (coding->dst_multibyte
5497                   || ! CHAR_BYTE8_P (c))
5498                 CHAR_STRING_ADVANCE (c, dst);
5499               else
5500                 *dst++ = CHAR_TO_BYTE8 (c);
5501               produced_chars++;
5502             }
5503           else
5504             /* This is an annotation datum.  (-C) is the length of
5505                it.  */
5506             buf += -c - 1;
5507         }
5508     }
5509   else
5510     {
5511       const unsigned char *src = coding->source;
5512       const unsigned char *src_end = src + coding->src_bytes;
5513       Lisp_Object eol_type;
5514
5515       eol_type = CODING_ID_EOL_TYPE (coding->id);
5516
5517       if (coding->src_multibyte != coding->dst_multibyte)
5518         {
5519           if (coding->src_multibyte)
5520             {
5521               int multibytep = 1;
5522               int consumed_chars;
5523
5524               while (1)
5525                 {
5526                   const unsigned char *src_base = src;
5527                   int c;
5528
5529                   ONE_MORE_BYTE (c);
5530                   if (c == '\r')
5531                     {
5532                       if (EQ (eol_type, Qdos))
5533                         {
5534                           if (src == src_end)
5535                             {
5536                               coding->result = CODING_RESULT_INSUFFICIENT_SRC;
5537                               goto no_more_source;
5538                             }
5539                           if (*src == '\n')
5540                             c = *src++;
5541                         }
5542                       else if (EQ (eol_type, Qmac))
5543                         c = '\n';
5544                     }
5545                   if (dst == dst_end)
5546                     {
5547                       coding->consumed = src - coding->source;
5548
5549                     if (EQ (coding->src_object, coding->dst_object))
5550                       dst_end = (unsigned char *) src;
5551                     if (dst == dst_end)
5552                       {
5553                         dst = alloc_destination (coding, src_end - src + 1,
5554                                                  dst);
5555                         dst_end = coding->destination + coding->dst_bytes;
5556                         coding_set_source (coding);
5557                         src = coding->source + coding->consumed;
5558                         src_end = coding->source + coding->src_bytes;
5559                       }
5560                     }
5561                   *dst++ = c;
5562                   produced_chars++;
5563                 }
5564             no_more_source:
5565               ;
5566             }
5567           else
5568             while (src < src_end)
5569               {
5570                 int multibytep = 1;
5571                 int c = *src++;
5572
5573                 if (c == '\r')
5574                   {
5575                     if (EQ (eol_type, Qdos))
5576                       {
5577                         if (src < src_end
5578                             && *src == '\n')
5579                           c = *src++;
5580                       }
5581                     else if (EQ (eol_type, Qmac))
5582                       c = '\n';
5583                   }
5584                 if (dst >= dst_end - 1)
5585                   {
5586                     coding->consumed = src - coding->source;
5587
5588                     if (EQ (coding->src_object, coding->dst_object))
5589                       dst_end = (unsigned char *) src;
5590                     if (dst >= dst_end - 1)
5591                       {
5592                         dst = alloc_destination (coding, src_end - src + 2,
5593                                                  dst);
5594                         dst_end = coding->destination + coding->dst_bytes;
5595                         coding_set_source (coding);
5596                         src = coding->source + coding->consumed;
5597                         src_end = coding->source + coding->src_bytes;
5598                       }
5599                   }
5600                 EMIT_ONE_BYTE (c);
5601               }
5602         }
5603       else
5604         {
5605           if (!EQ (coding->src_object, coding->dst_object))
5606             {
5607               int require = coding->src_bytes - coding->dst_bytes;
5608
5609               if (require > 0)
5610                 {
5611                   EMACS_INT offset = src - coding->source;
5612
5613                   dst = alloc_destination (coding, require, dst);
5614                   coding_set_source (coding);
5615                   src = coding->source + offset;
5616                   src_end = coding->source + coding->src_bytes;
5617                 }
5618             }
5619           produced_chars = coding->src_chars;
5620           while (src < src_end)
5621             {
5622               int c = *src++;
5623
5624               if (c == '\r')
5625                 {
5626                   if (EQ (eol_type, Qdos))
5627                     {
5628                       if (src < src_end
5629                           && *src == '\n')
5630                         c = *src++;
5631                       produced_chars--;
5632                     }
5633                   else if (EQ (eol_type, Qmac))
5634                     c = '\n';
5635                 }
5636               *dst++ = c;
5637             }
5638         }
5639       coding->consumed = coding->src_bytes;
5640       coding->consumed_char = coding->src_chars;
5641     }
5642
5643   produced = dst - (coding->destination + coding->produced);
5644   if (BUFFERP (coding->dst_object))
5645     insert_from_gap (produced_chars, produced);
5646   coding->produced += produced;
5647   coding->produced_char += produced_chars;
5648   return produced_chars;
5649 }
5650
5651 /* Compose text in CODING->object according to the annotation data at
5652    CHARBUF.  CHARBUF is an array:
5653      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5654  */
5655
5656 static INLINE void
5657 produce_composition (coding, charbuf)
5658      struct coding_system *coding;
5659      int *charbuf;
5660 {
5661   int len;
5662   EMACS_INT from, to;
5663   enum composition_method method;
5664   Lisp_Object components;
5665
5666   len = -charbuf[0];
5667   from = coding->dst_pos + charbuf[2];
5668   to = coding->dst_pos + charbuf[3];
5669   method = (enum composition_method) (charbuf[4]);
5670
5671   if (method == COMPOSITION_RELATIVE)
5672     components = Qnil;
5673   else
5674     {
5675       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5676       int i;
5677
5678       len -= 5;
5679       charbuf += 5;
5680       for (i = 0; i < len; i++)
5681         args[i] = make_number (charbuf[i]);
5682       components = (method == COMPOSITION_WITH_ALTCHARS
5683                     ? Fstring (len, args) : Fvector (len, args));
5684     }
5685   compose_text (from, to, components, Qnil, coding->dst_object);
5686 }
5687
5688
5689 /* Put `charset' property on text in CODING->object according to
5690    the annotation data at CHARBUF.  CHARBUF is an array:
5691      [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5692  */
5693
5694 static INLINE void
5695 produce_charset (coding, charbuf)
5696      struct coding_system *coding;
5697      int *charbuf;
5698 {
5699   EMACS_INT from = coding->dst_pos + charbuf[2];
5700   EMACS_INT to = coding->dst_pos + charbuf[3];
5701   struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
5702
5703   Fput_text_property (make_number (from), make_number (to),
5704                       Qcharset, CHARSET_NAME (charset),
5705                       coding->dst_object);
5706 }
5707
5708
5709 #define CHARBUF_SIZE 0x4000
5710
5711 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
5712   do {                                                                  \
5713     int size = CHARBUF_SIZE;;                                           \
5714                                                                         \
5715     coding->charbuf = NULL;                                             \
5716     while (size > 1024)                                                 \
5717       {                                                                 \
5718         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
5719         if (coding->charbuf)                                            \
5720           break;                                                        \
5721         size >>= 1;                                                     \
5722       }                                                                 \
5723     if (! coding->charbuf)                                              \
5724       {                                                                 \
5725         coding->result = CODING_RESULT_INSUFFICIENT_MEM;                \
5726         return coding->result;                                          \
5727       }                                                                 \
5728     coding->charbuf_size = size;                                        \
5729   } while (0)
5730
5731
5732 static void
5733 produce_annotation (coding)
5734      struct coding_system *coding;
5735 {
5736   int *charbuf = coding->charbuf;
5737   int *charbuf_end = charbuf + coding->charbuf_used;
5738
5739   if (NILP (coding->dst_object))
5740     return;
5741
5742   while (charbuf < charbuf_end)
5743     {
5744       if (*charbuf >= 0)
5745         charbuf++;
5746       else
5747         {
5748           int len = -*charbuf;
5749           switch (charbuf[1])
5750             {
5751             case CODING_ANNOTATE_COMPOSITION_MASK:
5752               produce_composition (coding, charbuf);
5753               break;
5754             case CODING_ANNOTATE_CHARSET_MASK:
5755               produce_charset (coding, charbuf);
5756               break;
5757             default:
5758               abort ();
5759             }
5760           charbuf += len;
5761         }
5762     }
5763 }
5764
5765 /* Decode the data at CODING->src_object into CODING->dst_object.
5766    CODING->src_object is a buffer, a string, or nil.
5767    CODING->dst_object is a buffer.
5768
5769    If CODING->src_object is a buffer, it must be the current buffer.
5770    In this case, if CODING->src_pos is positive, it is a position of
5771    the source text in the buffer, otherwise, the source text is in the
5772    gap area of the buffer, and CODING->src_pos specifies the offset of
5773    the text from GPT (which must be the same as PT).  If this is the
5774    same buffer as CODING->dst_object, CODING->src_pos must be
5775    negative.
5776
5777    If CODING->src_object is a string, CODING->src_pos in an index to
5778    that string.
5779
5780    If CODING->src_object is nil, CODING->source must already point to
5781    the non-relocatable memory area.  In this case, CODING->src_pos is
5782    an offset from CODING->source.
5783
5784    The decoded data is inserted at the current point of the buffer
5785    CODING->dst_object.
5786 */
5787
5788 static int
5789 decode_coding (coding)
5790      struct coding_system *coding;
5791 {
5792   Lisp_Object attrs;
5793
5794   if (BUFFERP (coding->src_object)
5795       && coding->src_pos > 0
5796       && coding->src_pos < GPT
5797       && coding->src_pos + coding->src_chars > GPT)
5798     move_gap_both (coding->src_pos, coding->src_pos_byte);
5799
5800   if (BUFFERP (coding->dst_object))
5801     {
5802       if (current_buffer != XBUFFER (coding->dst_object))
5803         set_buffer_internal (XBUFFER (coding->dst_object));
5804       if (GPT != PT)
5805         move_gap_both (PT, PT_BYTE);
5806     }
5807
5808   coding->consumed = coding->consumed_char = 0;
5809   coding->produced = coding->produced_char = 0;
5810   coding->chars_at_source = 0;
5811   coding->result = CODING_RESULT_SUCCESS;
5812   coding->errors = 0;
5813
5814   ALLOC_CONVERSION_WORK_AREA (coding);
5815
5816   attrs = CODING_ID_ATTRS (coding->id);
5817
5818   do
5819     {
5820       coding_set_source (coding);
5821       coding->annotated = 0;
5822       (*(coding->decoder)) (coding);
5823       if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5824         translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs));
5825       else if (!NILP (Vstandard_translation_table_for_decode))
5826         translate_chars (coding, Vstandard_translation_table_for_decode);
5827       coding_set_destination (coding);
5828       produce_chars (coding);
5829       if (coding->annotated)
5830         produce_annotation (coding);
5831     }
5832   while (coding->consumed < coding->src_bytes
5833          && ! coding->result);
5834
5835   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5836       && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5837       && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5838     decode_eol (coding);
5839
5840   coding->carryover_bytes = 0;
5841   if (coding->consumed < coding->src_bytes)
5842     {
5843       int nbytes = coding->src_bytes - coding->consumed;
5844       const unsigned char *src;
5845
5846       coding_set_source (coding);
5847       coding_set_destination (coding);
5848       src = coding->source + coding->consumed;
5849
5850       if (coding->mode & CODING_MODE_LAST_BLOCK)
5851         {
5852           /* Flush out unprocessed data as binary chars.  We are sure
5853              that the number of data is less than the size of
5854              coding->charbuf.  */
5855           while (nbytes-- > 0)
5856             {
5857               int c = *src++;
5858
5859               coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
5860             }
5861           produce_chars (coding);
5862         }
5863       else
5864         {
5865           /* Record unprocessed bytes in coding->carryover.  We are
5866              sure that the number of data is less than the size of
5867              coding->carryover.  */
5868           unsigned char *p = coding->carryover;
5869
5870           coding->carryover_bytes = nbytes;
5871           while (nbytes-- > 0)
5872             *p++ = *src++;
5873         }
5874       coding->consumed = coding->src_bytes;
5875     }
5876
5877   return coding->result;
5878 }
5879
5880
5881 /* Extract an annotation datum from a composition starting at POS and
5882    ending before LIMIT of CODING->src_object (buffer or string), store
5883    the data in BUF, set *STOP to a starting position of the next
5884    composition (if any) or to LIMIT, and return the address of the
5885    next element of BUF.
5886
5887    If such an annotation is not found, set *STOP to a starting
5888    position of a composition after POS (if any) or to LIMIT, and
5889    return BUF.  */
5890
5891 static INLINE int *
5892 handle_composition_annotation (pos, limit, coding, buf, stop)
5893      EMACS_INT pos, limit;
5894      struct coding_system *coding;
5895      int *buf;
5896      EMACS_INT *stop;
5897 {
5898   EMACS_INT start, end;
5899   Lisp_Object prop;
5900
5901   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
5902       || end > limit)
5903     *stop = limit;
5904   else if (start > pos)
5905     *stop = start;
5906   else
5907     {
5908       if (start == pos)
5909         {
5910           /* We found a composition.  Store the corresponding
5911              annotation data in BUF.  */
5912           int *head = buf;
5913           enum composition_method method = COMPOSITION_METHOD (prop);
5914           int nchars = COMPOSITION_LENGTH (prop);
5915
5916           ADD_COMPOSITION_DATA (buf, 0, nchars, method);
5917           if (method != COMPOSITION_RELATIVE)
5918             {
5919               Lisp_Object components;
5920               int len, i, i_byte;
5921
5922               components = COMPOSITION_COMPONENTS (prop);
5923               if (VECTORP (components))
5924                 {
5925                   len = XVECTOR (components)->size;
5926                   for (i = 0; i < len; i++)
5927                     *buf++ = XINT (AREF (components, i));
5928                 }
5929               else if (STRINGP (components))
5930                 {
5931                   len = SCHARS (components);
5932                   i = i_byte = 0;
5933                   while (i < len)
5934                     {
5935                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
5936                       buf++;
5937                     }
5938                 }
5939               else if (INTEGERP (components))
5940                 {
5941                   len = 1;
5942                   *buf++ = XINT (components);
5943                 }
5944               else if (CONSP (components))
5945                 {
5946                   for (len = 0; CONSP (components);
5947                        len++, components = XCDR (components))
5948                     *buf++ = XINT (XCAR (components));
5949                 }
5950               else
5951                 abort ();
5952               *head -= len;
5953             }
5954         }
5955
5956       if (find_composition (end, limit, &start, &end, &prop,
5957                             coding->src_object)
5958           && end <= limit)
5959         *stop = start;
5960       else
5961         *stop = limit;
5962     }
5963   return buf;
5964 }
5965
5966
5967 /* Extract an annotation datum from a text property `charset' at POS of
5968    CODING->src_object (buffer of string), store the data in BUF, set
5969    *STOP to the position where the value of `charset' property changes
5970    (limiting by LIMIT), and return the address of the next element of
5971    BUF.
5972
5973    If the property value is nil, set *STOP to the position where the
5974    property value is non-nil (limiting by LIMIT), and return BUF.  */
5975
5976 static INLINE int *
5977 handle_charset_annotation (pos, limit, coding, buf, stop)
5978      EMACS_INT pos, limit;
5979      struct coding_system *coding;
5980      int *buf;
5981      EMACS_INT *stop;
5982 {
5983   Lisp_Object val, next;
5984   int id;
5985
5986   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
5987   if (! NILP (val) && CHARSETP (val))
5988     id = XINT (CHARSET_SYMBOL_ID (val));
5989   else
5990     id = -1;
5991   ADD_CHARSET_DATA (buf, 0, 0, id);
5992   next = Fnext_single_property_change (make_number (pos), Qcharset,
5993                                        coding->src_object,
5994                                        make_number (limit));
5995   *stop = XINT (next);
5996   return buf;
5997 }
5998
5999
6000 static void
6001 consume_chars (coding)
6002      struct coding_system *coding;
6003 {
6004   int *buf = coding->charbuf;
6005   int *buf_end = coding->charbuf + coding->charbuf_size;
6006   const unsigned char *src = coding->source + coding->consumed;
6007   const unsigned char *src_end = coding->source + coding->src_bytes;
6008   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6009   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6010   int multibytep = coding->src_multibyte;
6011   Lisp_Object eol_type;
6012   int c;
6013   EMACS_INT stop, stop_composition, stop_charset;
6014
6015   eol_type = CODING_ID_EOL_TYPE (coding->id);
6016   if (VECTORP (eol_type))
6017     eol_type = Qunix;
6018
6019   /* Note: composition handling is not yet implemented.  */
6020   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6021
6022   if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6023     stop = stop_composition = pos;
6024   else
6025     stop = stop_composition = end_pos;
6026   if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6027     stop = stop_charset = pos;
6028   else
6029     stop_charset = end_pos;
6030
6031   /* Compensate for CRLF and annotation.  */
6032   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6033   while (buf < buf_end)
6034     {
6035       if (pos == stop)
6036         {
6037           if (pos == end_pos)
6038             break;
6039           if (pos == stop_composition)
6040             buf = handle_composition_annotation (pos, end_pos, coding,
6041                                                  buf, &stop_composition);
6042           if (pos == stop_charset)
6043             buf = handle_charset_annotation (pos, end_pos, coding,
6044                                              buf, &stop_charset);
6045           stop = (stop_composition < stop_charset
6046                   ? stop_composition : stop_charset);
6047         }
6048
6049       if (! multibytep)
6050         {
6051           EMACS_INT bytes;
6052
6053           if (! CODING_FOR_UNIBYTE (coding)
6054               && (bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6055             c = STRING_CHAR_ADVANCE (src), pos += bytes;
6056           else
6057             c = *src++, pos++;
6058         }
6059       else
6060         c = STRING_CHAR_ADVANCE (src), pos++;
6061       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6062         c = '\n';
6063       if (! EQ (eol_type, Qunix))
6064         {
6065           if (c == '\n')
6066             {
6067               if (EQ (eol_type, Qdos))
6068                 *buf++ = '\r';
6069               else
6070                 c = '\r';
6071             }
6072         }
6073       *buf++ = c;
6074     }
6075
6076   coding->consumed = src - coding->source;
6077   coding->consumed_char = pos - coding->src_pos;
6078   coding->charbuf_used = buf - coding->charbuf;
6079   coding->chars_at_source = 0;
6080 }
6081
6082
6083 /* Encode the text at CODING->src_object into CODING->dst_object.
6084    CODING->src_object is a buffer or a string.
6085    CODING->dst_object is a buffer or nil.
6086
6087    If CODING->src_object is a buffer, it must be the current buffer.
6088    In this case, if CODING->src_pos is positive, it is a position of
6089    the source text in the buffer, otherwise. the source text is in the
6090    gap area of the buffer, and coding->src_pos specifies the offset of
6091    the text from GPT (which must be the same as PT).  If this is the
6092    same buffer as CODING->dst_object, CODING->src_pos must be
6093    negative and CODING should not have `pre-write-conversion'.
6094
6095    If CODING->src_object is a string, CODING should not have
6096    `pre-write-conversion'.
6097
6098    If CODING->dst_object is a buffer, the encoded data is inserted at
6099    the current point of that buffer.
6100
6101    If CODING->dst_object is nil, the encoded data is placed at the
6102    memory area specified by CODING->destination.  */
6103
6104 static int
6105 encode_coding (coding)
6106      struct coding_system *coding;
6107 {
6108   Lisp_Object attrs;
6109
6110   attrs = CODING_ID_ATTRS (coding->id);
6111
6112   if (BUFFERP (coding->dst_object))
6113     {
6114       set_buffer_internal (XBUFFER (coding->dst_object));
6115       coding->dst_multibyte
6116         = ! NILP (current_buffer->enable_multibyte_characters);
6117     }
6118
6119   coding->consumed = coding->consumed_char = 0;
6120   coding->produced = coding->produced_char = 0;
6121   coding->result = CODING_RESULT_SUCCESS;
6122   coding->errors = 0;
6123
6124   ALLOC_CONVERSION_WORK_AREA (coding);
6125
6126   do {
6127     coding_set_source (coding);
6128     consume_chars (coding);
6129
6130     if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
6131       translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs));
6132     else if (!NILP (Vstandard_translation_table_for_encode))
6133       translate_chars (coding, Vstandard_translation_table_for_encode);
6134
6135     coding_set_destination (coding);
6136     (*(coding->encoder)) (coding);
6137   } while (coding->consumed_char < coding->src_chars);
6138
6139   if (BUFFERP (coding->dst_object))
6140     insert_from_gap (coding->produced_char, coding->produced);
6141
6142   return (coding->result);
6143 }
6144
6145
6146 /* Stack of working buffers used in code conversion.  An nil element
6147    means that the code conversion of that level is not using a working
6148    buffer.  */
6149 Lisp_Object Vcode_conversion_work_buf_list;
6150
6151 /* A working buffer used by the top level conversion.  */
6152 Lisp_Object Vcode_conversion_reused_work_buf;
6153
6154
6155 /* Return a working buffer that can be freely used by the following
6156    code conversion.  MULTIBYTEP specifies the multibyteness of the
6157    buffer.  */
6158
6159 Lisp_Object
6160 make_conversion_work_buffer (multibytep, depth)
6161      int multibytep, depth;
6162 {
6163   struct buffer *current = current_buffer;
6164   Lisp_Object buf, name;
6165
6166   if (depth == 0)
6167     {
6168       if (NILP (Vcode_conversion_reused_work_buf))
6169         Vcode_conversion_reused_work_buf
6170           = Fget_buffer_create (build_string (" *code-converting-work<0>*"));
6171       buf = Vcode_conversion_reused_work_buf;
6172     }
6173   else
6174     {
6175       if (depth < 0)
6176         {
6177           name = build_string (" *code-converting-work*");
6178           name = Fgenerate_new_buffer_name (name, Qnil);
6179         }
6180       else
6181         {
6182           char str[128];
6183
6184           sprintf (str, " *code-converting-work*<%d>", depth);
6185           name = build_string (str);
6186         }
6187       buf = Fget_buffer_create (name);
6188     }
6189   set_buffer_internal (XBUFFER (buf));
6190   current_buffer->undo_list = Qt;
6191   Ferase_buffer ();
6192   Fset_buffer_multibyte (multibytep ? Qt : Qnil);
6193   set_buffer_internal (current);
6194   return buf;
6195 }
6196
6197 static Lisp_Object
6198 code_conversion_restore (buffer)
6199      Lisp_Object buffer;
6200 {
6201   Lisp_Object workbuf;
6202
6203   workbuf = XCAR (Vcode_conversion_work_buf_list);
6204   if (! NILP (workbuf)
6205       && ! EQ (workbuf, Vcode_conversion_reused_work_buf)
6206       && ! NILP (Fbuffer_live_p (workbuf)))
6207     Fkill_buffer (workbuf);
6208   Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
6209   set_buffer_internal (XBUFFER (buffer));
6210   return Qnil;
6211 }
6212
6213 static Lisp_Object
6214 code_conversion_save (buffer, with_work_buf, multibyte)
6215      Lisp_Object buffer;
6216      int with_work_buf, multibyte;
6217 {
6218   Lisp_Object workbuf;
6219
6220   if (with_work_buf)
6221     {
6222       int depth = XINT (Flength (Vcode_conversion_work_buf_list));
6223
6224       workbuf = make_conversion_work_buffer (multibyte, depth);
6225     }
6226   else
6227     workbuf = Qnil;
6228   Vcode_conversion_work_buf_list
6229     = Fcons (workbuf, Vcode_conversion_work_buf_list);
6230   record_unwind_protect (code_conversion_restore, buffer);
6231   return workbuf;
6232 }
6233
6234 int
6235 decode_coding_gap (coding, chars, bytes)
6236      struct coding_system *coding;
6237      EMACS_INT chars, bytes;
6238 {
6239   int count = specpdl_ptr - specpdl;
6240   Lisp_Object attrs;
6241   Lisp_Object buffer;
6242
6243   buffer = Fcurrent_buffer ();
6244   code_conversion_save (buffer, 0, 0);
6245
6246   coding->src_object = buffer;
6247   coding->src_chars = chars;
6248   coding->src_bytes = bytes;
6249   coding->src_pos = -chars;
6250   coding->src_pos_byte = -bytes;
6251   coding->src_multibyte = chars < bytes;
6252   coding->dst_object = buffer;
6253   coding->dst_pos = PT;
6254   coding->dst_pos_byte = PT_BYTE;
6255   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6256   coding->mode |= CODING_MODE_LAST_BLOCK;
6257
6258   if (CODING_REQUIRE_DETECTION (coding))
6259     detect_coding (coding);
6260
6261   decode_coding (coding);
6262
6263   attrs = CODING_ID_ATTRS (coding->id);
6264   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6265     {
6266       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6267       Lisp_Object val;
6268
6269       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6270       val = call1 (CODING_ATTR_POST_READ (attrs),
6271                    make_number (coding->produced_char));
6272       CHECK_NATNUM (val);
6273       coding->produced_char += Z - prev_Z;
6274       coding->produced += Z_BYTE - prev_Z_BYTE;
6275     }
6276
6277   unbind_to (count, Qnil);
6278   return coding->result;
6279 }
6280
6281 int
6282 encode_coding_gap (coding, chars, bytes)
6283      struct coding_system *coding;
6284      EMACS_INT chars, bytes;
6285 {
6286   int count = specpdl_ptr - specpdl;
6287   Lisp_Object buffer;
6288
6289   buffer = Fcurrent_buffer ();
6290   code_conversion_save (buffer, 0, 0);
6291
6292   coding->src_object = buffer;
6293   coding->src_chars = chars;
6294   coding->src_bytes = bytes;
6295   coding->src_pos = -chars;
6296   coding->src_pos_byte = -bytes;
6297   coding->src_multibyte = chars < bytes;
6298   coding->dst_object = coding->src_object;
6299   coding->dst_pos = PT;
6300   coding->dst_pos_byte = PT_BYTE;
6301
6302   encode_coding (coding);
6303
6304   unbind_to (count, Qnil);
6305   return coding->result;
6306 }
6307
6308
6309 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6310    SRC_OBJECT into DST_OBJECT by coding context CODING.
6311
6312    SRC_OBJECT is a buffer, a string, or Qnil.
6313
6314    If it is a buffer, the text is at point of the buffer.  FROM and TO
6315    are positions in the buffer.
6316
6317    If it is a string, the text is at the beginning of the string.
6318    FROM and TO are indices to the string.
6319
6320    If it is nil, the text is at coding->source.  FROM and TO are
6321    indices to coding->source.
6322
6323    DST_OBJECT is a buffer, Qt, or Qnil.
6324
6325    If it is a buffer, the decoded text is inserted at point of the
6326    buffer.  If the buffer is the same as SRC_OBJECT, the source text
6327    is deleted.
6328
6329    If it is Qt, a string is made from the decoded text, and
6330    set in CODING->dst_object.
6331
6332    If it is Qnil, the decoded text is stored at CODING->destination.
6333    The caller must allocate CODING->dst_bytes bytes at
6334    CODING->destination by xmalloc.  If the decoded text is longer than
6335    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6336  */
6337
6338 void
6339 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6340                       dst_object)
6341      struct coding_system *coding;
6342      Lisp_Object src_object;
6343      EMACS_INT from, from_byte, to, to_byte;
6344      Lisp_Object dst_object;
6345 {
6346   int count = specpdl_ptr - specpdl;
6347   unsigned char *destination;
6348   EMACS_INT dst_bytes;
6349   EMACS_INT chars = to - from;
6350   EMACS_INT bytes = to_byte - from_byte;
6351   Lisp_Object attrs;
6352   Lisp_Object buffer;
6353   int saved_pt = -1, saved_pt_byte;
6354
6355   buffer = Fcurrent_buffer ();
6356
6357   if (NILP (dst_object))
6358     {
6359       destination = coding->destination;
6360       dst_bytes = coding->dst_bytes;
6361     }
6362
6363   coding->src_object = src_object;
6364   coding->src_chars = chars;
6365   coding->src_bytes = bytes;
6366   coding->src_multibyte = chars < bytes;
6367
6368   if (STRINGP (src_object))
6369     {
6370       coding->src_pos = from;
6371       coding->src_pos_byte = from_byte;
6372     }
6373   else if (BUFFERP (src_object))
6374     {
6375       set_buffer_internal (XBUFFER (src_object));
6376       if (from != GPT)
6377         move_gap_both (from, from_byte);
6378       if (EQ (src_object, dst_object))
6379         {
6380           saved_pt = PT, saved_pt_byte = PT_BYTE;
6381           TEMP_SET_PT_BOTH (from, from_byte);
6382           del_range_both (from, from_byte, to, to_byte, 1);
6383           coding->src_pos = -chars;
6384           coding->src_pos_byte = -bytes;
6385         }
6386       else
6387         {
6388           coding->src_pos = from;
6389           coding->src_pos_byte = from_byte;
6390         }
6391     }
6392
6393   if (CODING_REQUIRE_DETECTION (coding))
6394     detect_coding (coding);
6395   attrs = CODING_ID_ATTRS (coding->id);
6396
6397   if (EQ (dst_object, Qt)
6398       || (! NILP (CODING_ATTR_POST_READ (attrs))
6399           && NILP (dst_object)))
6400     {
6401       coding->dst_object = code_conversion_save (buffer, 1, 1);
6402       coding->dst_pos = BEG;
6403       coding->dst_pos_byte = BEG_BYTE;
6404       coding->dst_multibyte = 1;
6405     }
6406   else if (BUFFERP (dst_object))
6407     {
6408       code_conversion_save (buffer, 0, 0);
6409       coding->dst_object = dst_object;
6410       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6411       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6412       coding->dst_multibyte
6413         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6414     }
6415   else
6416     {
6417       code_conversion_save (buffer, 0, 0);
6418       coding->dst_object = Qnil;
6419       coding->dst_multibyte = 1;
6420     }
6421
6422   decode_coding (coding);
6423
6424   if (BUFFERP (coding->dst_object))
6425     set_buffer_internal (XBUFFER (coding->dst_object));
6426
6427   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6428     {
6429       struct gcpro gcpro1, gcpro2;
6430       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6431       Lisp_Object val;
6432
6433       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6434       GCPRO2 (coding->src_object, coding->dst_object);
6435       val = call1 (CODING_ATTR_POST_READ (attrs),
6436                    make_number (coding->produced_char));
6437       UNGCPRO;
6438       CHECK_NATNUM (val);
6439       coding->produced_char += Z - prev_Z;
6440       coding->produced += Z_BYTE - prev_Z_BYTE;
6441     }
6442
6443   if (EQ (dst_object, Qt))
6444     {
6445       coding->dst_object = Fbuffer_string ();
6446     }
6447   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6448     {
6449       set_buffer_internal (XBUFFER (coding->dst_object));
6450       if (dst_bytes < coding->produced)
6451         {
6452           destination
6453             = (unsigned char *) xrealloc (destination, coding->produced);
6454           if (! destination)
6455             {
6456               coding->result = CODING_RESULT_INSUFFICIENT_DST;
6457               unbind_to (count, Qnil);
6458               return;
6459             }
6460           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6461             move_gap_both (BEGV, BEGV_BYTE);
6462           bcopy (BEGV_ADDR, destination, coding->produced);
6463           coding->destination = destination;
6464         }
6465     }
6466
6467   if (saved_pt >= 0)
6468     {
6469       /* This is the case of:
6470          (BUFFERP (src_object) && EQ (src_object, dst_object))
6471          As we have moved PT while replacing the original buffer
6472          contents, we must recover it now.  */
6473       set_buffer_internal (XBUFFER (src_object));
6474       if (saved_pt < from)
6475         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6476       else if (saved_pt < from + chars)
6477         TEMP_SET_PT_BOTH (from, from_byte);
6478       else if (! NILP (current_buffer->enable_multibyte_characters))
6479         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6480                           saved_pt_byte + (coding->produced - bytes));
6481       else
6482         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6483                           saved_pt_byte + (coding->produced - bytes));
6484     }
6485
6486   unbind_to (count, Qnil);
6487 }
6488
6489
6490 void
6491 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6492                       dst_object)
6493      struct coding_system *coding;
6494      Lisp_Object src_object;
6495      EMACS_INT from, from_byte, to, to_byte;
6496      Lisp_Object dst_object;
6497 {
6498   int count = specpdl_ptr - specpdl;
6499   EMACS_INT chars = to - from;
6500   EMACS_INT bytes = to_byte - from_byte;
6501   Lisp_Object attrs;
6502   Lisp_Object buffer;
6503   int saved_pt = -1, saved_pt_byte;
6504
6505   buffer = Fcurrent_buffer ();
6506
6507   coding->src_object = src_object;
6508   coding->src_chars = chars;
6509   coding->src_bytes = bytes;
6510   coding->src_multibyte = chars < bytes;
6511
6512   attrs = CODING_ID_ATTRS (coding->id);
6513
6514   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6515     {
6516       coding->src_object = code_conversion_save (buffer, 1,
6517                                                  coding->src_multibyte);
6518       set_buffer_internal (XBUFFER (coding->src_object));
6519       if (STRINGP (src_object))
6520         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6521       else if (BUFFERP (src_object))
6522         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6523       else
6524         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6525
6526       if (EQ (src_object, dst_object))
6527         {
6528           set_buffer_internal (XBUFFER (src_object));
6529           saved_pt = PT, saved_pt_byte = PT_BYTE;
6530           del_range_both (from, from_byte, to, to_byte, 1);
6531           set_buffer_internal (XBUFFER (coding->src_object));
6532         }
6533
6534       call2 (CODING_ATTR_PRE_WRITE (attrs),
6535              make_number (BEG), make_number (Z));
6536       coding->src_object = Fcurrent_buffer ();
6537       if (BEG != GPT)
6538         move_gap_both (BEG, BEG_BYTE);
6539       coding->src_chars = Z - BEG;
6540       coding->src_bytes = Z_BYTE - BEG_BYTE;
6541       coding->src_pos = BEG;
6542       coding->src_pos_byte = BEG_BYTE;
6543       coding->src_multibyte = Z < Z_BYTE;
6544     }
6545   else if (STRINGP (src_object))
6546     {
6547       code_conversion_save (buffer, 0, 0);
6548       coding->src_pos = from;
6549       coding->src_pos_byte = from_byte;
6550     }
6551   else if (BUFFERP (src_object))
6552     {
6553       code_conversion_save (buffer, 0, 0);
6554       set_buffer_internal (XBUFFER (src_object));
6555       if (EQ (src_object, dst_object))
6556         {
6557           saved_pt = PT, saved_pt_byte = PT_BYTE;
6558           coding->src_object = del_range_1 (from, to, 1, 1);
6559           coding->src_pos = 0;
6560           coding->src_pos_byte = 0;
6561         }
6562       else
6563         {
6564           if (from < GPT && to >= GPT)
6565             move_gap_both (from, from_byte);
6566           coding->src_pos = from;
6567           coding->src_pos_byte = from_byte;
6568         }
6569     }
6570   else
6571     code_conversion_save (buffer, 0, 0);
6572
6573   if (BUFFERP (dst_object))
6574     {
6575       coding->dst_object = dst_object;
6576       if (EQ (src_object, dst_object))
6577         {
6578           coding->dst_pos = from;
6579           coding->dst_pos_byte = from_byte;
6580         }
6581       else
6582         {
6583           coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6584           coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6585         }
6586       coding->dst_multibyte
6587         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6588     }
6589   else if (EQ (dst_object, Qt))
6590     {
6591       coding->dst_object = Qnil;
6592       coding->dst_bytes = coding->src_chars;
6593       if (coding->dst_bytes == 0)
6594         coding->dst_bytes = 1;
6595       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
6596       coding->dst_multibyte = 0;
6597     }
6598   else
6599     {
6600       coding->dst_object = Qnil;
6601       coding->dst_multibyte = 0;
6602     }
6603
6604   encode_coding (coding);
6605
6606   if (EQ (dst_object, Qt))
6607     {
6608       if (BUFFERP (coding->dst_object))
6609         coding->dst_object = Fbuffer_string ();
6610       else
6611         {
6612           coding->dst_object
6613             = make_unibyte_string ((char *) coding->destination,
6614                                    coding->produced);
6615           xfree (coding->destination);
6616         }
6617     }
6618
6619   if (saved_pt >= 0)
6620     {
6621       /* This is the case of:
6622          (BUFFERP (src_object) && EQ (src_object, dst_object))
6623          As we have moved PT while replacing the original buffer
6624          contents, we must recover it now.  */
6625       set_buffer_internal (XBUFFER (src_object));
6626       if (saved_pt < from)
6627         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6628       else if (saved_pt < from + chars)
6629         TEMP_SET_PT_BOTH (from, from_byte);
6630       else if (! NILP (current_buffer->enable_multibyte_characters))
6631         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6632                           saved_pt_byte + (coding->produced - bytes));
6633       else
6634         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6635                           saved_pt_byte + (coding->produced - bytes));
6636     }
6637
6638   unbind_to (count, Qnil);
6639 }
6640
6641
6642 Lisp_Object
6643 preferred_coding_system ()
6644 {
6645   int id = coding_categories[coding_priorities[0]].id;
6646
6647   return CODING_ID_NAME (id);
6648 }
6649
6650 \f
6651 #ifdef emacs
6652 /*** 8. Emacs Lisp library functions ***/
6653
6654 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6655        doc: /* Return t if OBJECT is nil or a coding-system.
6656 See the documentation of `define-coding-system' for information
6657 about coding-system objects.  */)
6658      (obj)
6659      Lisp_Object obj;
6660 {
6661   return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6662 }
6663
6664 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6665        Sread_non_nil_coding_system, 1, 1, 0,
6666        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6667      (prompt)
6668      Lisp_Object prompt;
6669 {
6670   Lisp_Object val;
6671   do
6672     {
6673       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6674                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6675     }
6676   while (SCHARS (val) == 0);
6677   return (Fintern (val, Qnil));
6678 }
6679
6680 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6681        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6682 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6683      (prompt, default_coding_system)
6684      Lisp_Object prompt, default_coding_system;
6685 {
6686   Lisp_Object val;
6687   if (SYMBOLP (default_coding_system))
6688     XSETSTRING (default_coding_system, XPNTR (SYMBOL_NAME (default_coding_system)));
6689   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6690                           Qt, Qnil, Qcoding_system_history,
6691                           default_coding_system, Qnil);
6692   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
6693 }
6694
6695 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6696        1, 1, 0,
6697        doc: /* Check validity of CODING-SYSTEM.
6698 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.  */)
6699   (coding_system)
6700      Lisp_Object coding_system;
6701 {
6702   CHECK_SYMBOL (coding_system);
6703   if (!NILP (Fcoding_system_p (coding_system)))
6704     return coding_system;
6705   while (1)
6706     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6707 }
6708
6709 \f
6710 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
6711    HIGHEST is nonzero, return the coding system of the highest
6712    priority among the detected coding systems.  Otherwize return a
6713    list of detected coding systems sorted by their priorities.  If
6714    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6715    multibyte form but contains only ASCII and eight-bit chars.
6716    Otherwise, the bytes are raw bytes.
6717
6718    CODING-SYSTEM controls the detection as below:
6719
6720    If it is nil, detect both text-format and eol-format.  If the
6721    text-format part of CODING-SYSTEM is already specified
6722    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
6723    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6724    detect only text-format.  */
6725
6726 Lisp_Object
6727 detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
6728      const unsigned char *src;
6729      int src_bytes, highest;
6730      int multibytep;
6731      Lisp_Object coding_system;
6732 {
6733   const unsigned char *src_end = src + src_bytes;
6734   Lisp_Object attrs, eol_type;
6735   Lisp_Object val;
6736   struct coding_system coding;
6737   int id;
6738   struct coding_detection_info detect_info;
6739
6740   if (NILP (coding_system))
6741     coding_system = Qundecided;
6742   setup_coding_system (coding_system, &coding);
6743   attrs = CODING_ID_ATTRS (coding.id);
6744   eol_type = CODING_ID_EOL_TYPE (coding.id);
6745   coding_system = CODING_ATTR_BASE_NAME (attrs);
6746
6747   coding.source = src;
6748   coding.src_bytes = src_bytes;
6749   coding.src_multibyte = multibytep;
6750   coding.consumed = 0;
6751   coding.mode |= CODING_MODE_LAST_BLOCK;
6752
6753   detect_info.checked = detect_info.found = detect_info.rejected = 0;
6754
6755   /* At first, detect text-format if necessary.  */
6756   if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided)
6757     {
6758       enum coding_category category;
6759       struct coding_system *this;
6760       int c, i;
6761
6762       for (; src < src_end; src++)
6763         {
6764           c = *src;
6765           if (c & 0x80
6766               || (c < 0x20 && (c == ISO_CODE_ESC
6767                                || c == ISO_CODE_SI
6768                                || c == ISO_CODE_SO)))
6769             break;
6770         }
6771       coding.head_ascii = src - coding.source;
6772
6773       if (src < src_end)
6774         for (i = 0; i < coding_category_raw_text; i++)
6775           {
6776             category = coding_priorities[i];
6777             this = coding_categories + category;
6778
6779             if (this->id < 0)
6780               {
6781                 /* No coding system of this category is defined.  */
6782                 detect_info.rejected |= (1 << category);
6783               }
6784             else if (category >= coding_category_raw_text)
6785               continue;
6786             else if (detect_info.checked & (1 << category))
6787               {
6788                 if (highest
6789                     && (detect_info.found & (1 << category)))
6790                   break;
6791               }
6792             else
6793               {
6794                 if ((*(this->detector)) (&coding, &detect_info)
6795                     && highest
6796                     && (detect_info.found & (1 << category)))
6797                   break;
6798               }
6799           }
6800
6801
6802       if (detect_info.rejected == CATEGORY_MASK_ANY)
6803         {
6804           detect_info.found = CATEGORY_MASK_RAW_TEXT;
6805           id = coding_categories[coding_category_raw_text].id;
6806           val = Fcons (make_number (id), Qnil);
6807         }
6808       else if (! detect_info.rejected && ! detect_info.found)
6809         {
6810           detect_info.found = CATEGORY_MASK_ANY;
6811           id = coding_categories[coding_category_undecided].id;
6812           val = Fcons (make_number (id), Qnil);
6813         }
6814       else if (highest)
6815         {
6816           if (detect_info.found)
6817             {
6818               detect_info.found = 1 << category;
6819               val = Fcons (make_number (this->id), Qnil);
6820             }
6821           else
6822             for (i = 0; i < coding_category_raw_text; i++)
6823               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6824                 {
6825                   detect_info.found = 1 << coding_priorities[i];
6826                   id = coding_categories[coding_priorities[i]].id;
6827                   val = Fcons (make_number (id), Qnil);
6828                   break;
6829                 }
6830         }
6831       else
6832         {
6833           int mask = detect_info.rejected | detect_info.found;
6834           int found = 0;
6835           val = Qnil;
6836
6837           for (i = coding_category_raw_text - 1; i >= 0; i--)
6838             {
6839               category = coding_priorities[i];
6840               if (! (mask & (1 << category)))
6841                 {
6842                   found |= 1 << category;
6843                   id = coding_categories[category].id;
6844                   val = Fcons (make_number (id), val);
6845                 }
6846             }
6847           for (i = coding_category_raw_text - 1; i >= 0; i--)
6848             {
6849               category = coding_priorities[i];
6850               if (detect_info.found & (1 << category))
6851                 {
6852                   id = coding_categories[category].id;
6853                   val = Fcons (make_number (id), val);
6854                 }
6855             }
6856           detect_info.found |= found;
6857         }
6858     }
6859   else
6860     {
6861       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
6862       val = Fcons (make_number (coding.id), Qnil);
6863     }
6864
6865   /* Then, detect eol-format if necessary.  */
6866   {
6867     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
6868     Lisp_Object tail;
6869
6870     if (VECTORP (eol_type))
6871       {
6872         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
6873           normal_eol = detect_eol (coding.source, src_bytes,
6874                                    coding_category_raw_text);
6875         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
6876                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
6877           utf_16_be_eol = detect_eol (coding.source, src_bytes,
6878                                       coding_category_utf_16_be);
6879         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
6880                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
6881           utf_16_le_eol = detect_eol (coding.source, src_bytes,
6882                                       coding_category_utf_16_le);
6883       }
6884     else
6885       {
6886         if (EQ (eol_type, Qunix))
6887           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
6888         else if (EQ (eol_type, Qdos))
6889           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
6890         else
6891           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
6892       }
6893
6894     for (tail = val; CONSP (tail); tail = XCDR (tail))
6895       {
6896         enum coding_category category;
6897         int this_eol;
6898
6899         id = XINT (XCAR (tail));
6900         attrs = CODING_ID_ATTRS (id);
6901         category = XINT (CODING_ATTR_CATEGORY (attrs));
6902         eol_type = CODING_ID_EOL_TYPE (id);
6903         if (VECTORP (eol_type))
6904           {
6905             if (category == coding_category_utf_16_be
6906                 || category == coding_category_utf_16_be_nosig)
6907               this_eol = utf_16_be_eol;
6908             else if (category == coding_category_utf_16_le
6909                      || category == coding_category_utf_16_le_nosig)
6910               this_eol = utf_16_le_eol;
6911             else
6912               this_eol = normal_eol;
6913
6914             if (this_eol == EOL_SEEN_LF)
6915               XSETCAR (tail, AREF (eol_type, 0));
6916             else if (this_eol == EOL_SEEN_CRLF)
6917               XSETCAR (tail, AREF (eol_type, 1));
6918             else if (this_eol == EOL_SEEN_CR)
6919               XSETCAR (tail, AREF (eol_type, 2));
6920             else
6921               XSETCAR (tail, CODING_ID_NAME (id));
6922           }
6923         else
6924           XSETCAR (tail, CODING_ID_NAME (id));
6925       }
6926   }
6927
6928   return (highest ? XCAR (val) : val);
6929 }
6930
6931
6932 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6933        2, 3, 0,
6934        doc: /* Detect coding system of the text in the region between START and END.
6935 Return a list of possible coding systems ordered by priority.
6936
6937 If only ASCII characters are found, it returns a list of single element
6938 `undecided' or its subsidiary coding system according to a detected
6939 end-of-line format.
6940
6941 If optional argument HIGHEST is non-nil, return the coding system of
6942 highest priority.  */)
6943      (start, end, highest)
6944      Lisp_Object start, end, highest;
6945 {
6946   int from, to;
6947   int from_byte, to_byte;
6948
6949   CHECK_NUMBER_COERCE_MARKER (start);
6950   CHECK_NUMBER_COERCE_MARKER (end);
6951
6952   validate_region (&start, &end);
6953   from = XINT (start), to = XINT (end);
6954   from_byte = CHAR_TO_BYTE (from);
6955   to_byte = CHAR_TO_BYTE (to);
6956
6957   if (from < GPT && to >= GPT)
6958     move_gap_both (to, to_byte);
6959
6960   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6961                                to_byte - from_byte,
6962                                !NILP (highest),
6963                                !NILP (current_buffer
6964                                       ->enable_multibyte_characters),
6965                                Qnil);
6966 }
6967
6968 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6969        1, 2, 0,
6970        doc: /* Detect coding system of the text in STRING.
6971 Return a list of possible coding systems ordered by priority.
6972
6973 If only ASCII characters are found, it returns a list of single element
6974 `undecided' or its subsidiary coding system according to a detected
6975 end-of-line format.
6976
6977 If optional argument HIGHEST is non-nil, return the coding system of
6978 highest priority.  */)
6979      (string, highest)
6980      Lisp_Object string, highest;
6981 {
6982   CHECK_STRING (string);
6983
6984   return detect_coding_system (SDATA (string), SBYTES (string),
6985                                !NILP (highest), STRING_MULTIBYTE (string),
6986                                Qnil);
6987 }
6988
6989
6990 static INLINE int
6991 char_encodable_p (c, attrs)
6992      int c;
6993      Lisp_Object attrs;
6994 {
6995   Lisp_Object tail;
6996   struct charset *charset;
6997
6998   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6999        CONSP (tail); tail = XCDR (tail))
7000     {
7001       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7002       if (CHAR_CHARSET_P (c, charset))
7003         break;
7004     }
7005   return (! NILP (tail));
7006 }
7007
7008
7009 /* Return a list of coding systems that safely encode the text between
7010    START and END.  If EXCLUDE is non-nil, it is a list of coding
7011    systems not to check.  The returned list doesn't contain any such
7012    coding systems.  In any case, if the text contains only ASCII or is
7013    unibyte, return t.  */
7014
7015 DEFUN ("find-coding-systems-region-internal",
7016        Ffind_coding_systems_region_internal,
7017        Sfind_coding_systems_region_internal, 2, 3, 0,
7018        doc: /* Internal use only.  */)
7019      (start, end, exclude)
7020      Lisp_Object start, end, exclude;
7021 {
7022   Lisp_Object coding_attrs_list, safe_codings;
7023   EMACS_INT start_byte, end_byte;
7024   const unsigned char *p, *pbeg, *pend;
7025   int c;
7026   Lisp_Object tail, elt;
7027
7028   if (STRINGP (start))
7029     {
7030       if (!STRING_MULTIBYTE (start)
7031           || SCHARS (start) == SBYTES (start))
7032         return Qt;
7033       start_byte = 0;
7034       end_byte = SBYTES (start);
7035     }
7036   else
7037     {
7038       CHECK_NUMBER_COERCE_MARKER (start);
7039       CHECK_NUMBER_COERCE_MARKER (end);
7040       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7041         args_out_of_range (start, end);
7042       if (NILP (current_buffer->enable_multibyte_characters))
7043         return Qt;
7044       start_byte = CHAR_TO_BYTE (XINT (start));
7045       end_byte = CHAR_TO_BYTE (XINT (end));
7046       if (XINT (end) - XINT (start) == end_byte - start_byte)
7047         return Qt;
7048
7049       if (XINT (start) < GPT && XINT (end) > GPT)
7050         {
7051           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7052             move_gap_both (XINT (start), start_byte);
7053           else
7054             move_gap_both (XINT (end), end_byte);
7055         }
7056     }
7057
7058   coding_attrs_list = Qnil;
7059   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7060     if (NILP (exclude)
7061         || NILP (Fmemq (XCAR (tail), exclude)))
7062       {
7063         Lisp_Object attrs;
7064
7065         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7066         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7067             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7068           coding_attrs_list = Fcons (attrs, coding_attrs_list);
7069       }
7070
7071   if (STRINGP (start))
7072     p = pbeg = SDATA (start);
7073   else
7074     p = pbeg = BYTE_POS_ADDR (start_byte);
7075   pend = p + (end_byte - start_byte);
7076
7077   while (p < pend && ASCII_BYTE_P (*p)) p++;
7078   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7079
7080   while (p < pend)
7081     {
7082       if (ASCII_BYTE_P (*p))
7083         p++;
7084       else
7085         {
7086           c = STRING_CHAR_ADVANCE (p);
7087
7088           charset_map_loaded = 0;
7089           for (tail = coding_attrs_list; CONSP (tail);)
7090             {
7091               elt = XCAR (tail);
7092               if (NILP (elt))
7093                 tail = XCDR (tail);
7094               else if (char_encodable_p (c, elt))
7095                 tail = XCDR (tail);
7096               else if (CONSP (XCDR (tail)))
7097                 {
7098                   XSETCAR (tail, XCAR (XCDR (tail)));
7099                   XSETCDR (tail, XCDR (XCDR (tail)));
7100                 }
7101               else
7102                 {
7103                   XSETCAR (tail, Qnil);
7104                   tail = XCDR (tail);
7105                 }
7106             }
7107           if (charset_map_loaded)
7108             {
7109               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7110
7111               if (STRINGP (start))
7112                 pbeg = SDATA (start);
7113               else
7114                 pbeg = BYTE_POS_ADDR (start_byte);
7115               p = pbeg + p_offset;
7116               pend = pbeg + pend_offset;
7117             }
7118         }
7119     }
7120
7121   safe_codings = Qnil;
7122   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7123     if (! NILP (XCAR (tail)))
7124       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
7125
7126   return safe_codings;
7127 }
7128
7129
7130 DEFUN ("unencodable-char-position", Funencodable_char_position,
7131        Sunencodable_char_position, 3, 5, 0,
7132        doc: /*
7133 Return position of first un-encodable character in a region.
7134 START and END specfiy the region and CODING-SYSTEM specifies the
7135 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
7136
7137 If optional 4th argument COUNT is non-nil, it specifies at most how
7138 many un-encodable characters to search.  In this case, the value is a
7139 list of positions.
7140
7141 If optional 5th argument STRING is non-nil, it is a string to search
7142 for un-encodable characters.  In that case, START and END are indexes
7143 to the string.  */)
7144      (start, end, coding_system, count, string)
7145      Lisp_Object start, end, coding_system, count, string;
7146 {
7147   int n;
7148   struct coding_system coding;
7149   Lisp_Object attrs, charset_list;
7150   Lisp_Object positions;
7151   int from, to;
7152   const unsigned char *p, *stop, *pend;
7153   int ascii_compatible;
7154
7155   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7156   attrs = CODING_ID_ATTRS (coding.id);
7157   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7158     return Qnil;
7159   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7160   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7161
7162   if (NILP (string))
7163     {
7164       validate_region (&start, &end);
7165       from = XINT (start);
7166       to = XINT (end);
7167       if (NILP (current_buffer->enable_multibyte_characters)
7168           || (ascii_compatible
7169               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7170         return Qnil;
7171       p = CHAR_POS_ADDR (from);
7172       pend = CHAR_POS_ADDR (to);
7173       if (from < GPT && to >= GPT)
7174         stop = GPT_ADDR;
7175       else
7176         stop = pend;
7177     }
7178   else
7179     {
7180       CHECK_STRING (string);
7181       CHECK_NATNUM (start);
7182       CHECK_NATNUM (end);
7183       from = XINT (start);
7184       to = XINT (end);
7185       if (from > to
7186           || to > SCHARS (string))
7187         args_out_of_range_3 (string, start, end);
7188       if (! STRING_MULTIBYTE (string))
7189         return Qnil;
7190       p = SDATA (string) + string_char_to_byte (string, from);
7191       stop = pend = SDATA (string) + string_char_to_byte (string, to);
7192       if (ascii_compatible && (to - from) == (pend - p))
7193         return Qnil;
7194     }
7195
7196   if (NILP (count))
7197     n = 1;
7198   else
7199     {
7200       CHECK_NATNUM (count);
7201       n = XINT (count);
7202     }
7203
7204   positions = Qnil;
7205   while (1)
7206     {
7207       int c;
7208
7209       if (ascii_compatible)
7210         while (p < stop && ASCII_BYTE_P (*p))
7211           p++, from++;
7212       if (p >= stop)
7213         {
7214           if (p >= pend)
7215             break;
7216           stop = pend;
7217           p = GAP_END_ADDR;
7218         }
7219
7220       c = STRING_CHAR_ADVANCE (p);
7221       if (! (ASCII_CHAR_P (c) && ascii_compatible)
7222           && ! char_charset (c, charset_list, NULL))
7223         {
7224           positions = Fcons (make_number (from), positions);
7225           n--;
7226           if (n == 0)
7227             break;
7228         }
7229
7230       from++;
7231     }
7232
7233   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
7234 }
7235
7236
7237 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7238        Scheck_coding_systems_region, 3, 3, 0,
7239        doc: /* Check if the region is encodable by coding systems.
7240
7241 START and END are buffer positions specifying the region.
7242 CODING-SYSTEM-LIST is a list of coding systems to check.
7243
7244 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7245 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7246 whole region, POS0, POS1, ... are buffer positions where non-encodable
7247 characters are found.
7248
7249 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7250 value is nil.
7251
7252 START may be a string.  In that case, check if the string is
7253 encodable, and the value contains indices to the string instead of
7254 buffer positions.  END is ignored.  */)
7255      (start, end, coding_system_list)
7256      Lisp_Object start, end, coding_system_list;
7257 {
7258   Lisp_Object list;
7259   EMACS_INT start_byte, end_byte;
7260   int pos;
7261   const unsigned char *p, *pbeg, *pend;
7262   int c;
7263   Lisp_Object tail, elt;
7264
7265   if (STRINGP (start))
7266     {
7267       if (!STRING_MULTIBYTE (start)
7268           && SCHARS (start) != SBYTES (start))
7269         return Qnil;
7270       start_byte = 0;
7271       end_byte = SBYTES (start);
7272       pos = 0;
7273     }
7274   else
7275     {
7276       CHECK_NUMBER_COERCE_MARKER (start);
7277       CHECK_NUMBER_COERCE_MARKER (end);
7278       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7279         args_out_of_range (start, end);
7280       if (NILP (current_buffer->enable_multibyte_characters))
7281         return Qnil;
7282       start_byte = CHAR_TO_BYTE (XINT (start));
7283       end_byte = CHAR_TO_BYTE (XINT (end));
7284       if (XINT (end) - XINT (start) == end_byte - start_byte)
7285         return Qt;
7286
7287       if (XINT (start) < GPT && XINT (end) > GPT)
7288         {
7289           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7290             move_gap_both (XINT (start), start_byte);
7291           else
7292             move_gap_both (XINT (end), end_byte);
7293         }
7294       pos = XINT (start);
7295     }
7296
7297   list = Qnil;
7298   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7299     {
7300       elt = XCAR (tail);
7301       list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
7302                                        Qnil)),
7303                     list);
7304     }
7305
7306   if (STRINGP (start))
7307     p = pbeg = SDATA (start);
7308   else
7309     p = pbeg = BYTE_POS_ADDR (start_byte);
7310   pend = p + (end_byte - start_byte);
7311
7312   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7313   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7314
7315   while (p < pend)
7316     {
7317       if (ASCII_BYTE_P (*p))
7318         p++;
7319       else
7320         {
7321           c = STRING_CHAR_ADVANCE (p);
7322
7323           charset_map_loaded = 0;
7324           for (tail = list; CONSP (tail); tail = XCDR (tail))
7325             {
7326               elt = XCDR (XCAR (tail));
7327               if (! char_encodable_p (c, XCAR (elt)))
7328                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7329             }
7330           if (charset_map_loaded)
7331             {
7332               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7333
7334               if (STRINGP (start))
7335                 pbeg = SDATA (start);
7336               else
7337                 pbeg = BYTE_POS_ADDR (start_byte);
7338               p = pbeg + p_offset;
7339               pend = pbeg + pend_offset;
7340             }
7341         }
7342       pos++;
7343     }
7344
7345   tail = list;
7346   list = Qnil;
7347   for (; CONSP (tail); tail = XCDR (tail))
7348     {
7349       elt = XCAR (tail);
7350       if (CONSP (XCDR (XCDR (elt))))
7351         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7352                       list);
7353     }
7354
7355   return list;
7356 }
7357
7358
7359
7360 Lisp_Object
7361 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7362      Lisp_Object start, end, coding_system, dst_object;
7363      int encodep, norecord;
7364 {
7365   struct coding_system coding;
7366   EMACS_INT from, from_byte, to, to_byte;
7367   Lisp_Object src_object;
7368
7369   CHECK_NUMBER_COERCE_MARKER (start);
7370   CHECK_NUMBER_COERCE_MARKER (end);
7371   if (NILP (coding_system))
7372     coding_system = Qno_conversion;
7373   else
7374     CHECK_CODING_SYSTEM (coding_system);
7375   src_object = Fcurrent_buffer ();
7376   if (NILP (dst_object))
7377     dst_object = src_object;
7378   else if (! EQ (dst_object, Qt))
7379     CHECK_BUFFER (dst_object);
7380
7381   validate_region (&start, &end);
7382   from = XFASTINT (start);
7383   from_byte = CHAR_TO_BYTE (from);
7384   to = XFASTINT (end);
7385   to_byte = CHAR_TO_BYTE (to);
7386
7387   setup_coding_system (coding_system, &coding);
7388   coding.mode |= CODING_MODE_LAST_BLOCK;
7389
7390   if (encodep)
7391     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7392                           dst_object);
7393   else
7394     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7395                           dst_object);
7396   if (! norecord)
7397     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7398
7399   if (coding.result != CODING_RESULT_SUCCESS)
7400     error ("Code conversion error: %d", coding.result);
7401
7402   return (BUFFERP (dst_object)
7403           ? make_number (coding.produced_char)
7404           : coding.dst_object);
7405 }
7406
7407
7408 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
7409        3, 4, "r\nzCoding system: ",
7410        doc: /* Decode the current region from the specified coding system.
7411 When called from a program, takes four arguments:
7412         START, END, CODING-SYSTEM, and DESTINATION.
7413 START and END are buffer positions.
7414
7415 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7416 If nil, the region between START and END is replace by the decoded text.
7417 If buffer, the decoded text is inserted in the buffer.
7418 If t, the decoded text is returned.
7419
7420 This function sets `last-coding-system-used' to the precise coding system
7421 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7422 not fully specified.)
7423 It returns the length of the decoded text.  */)
7424      (start, end, coding_system, destination)
7425      Lisp_Object start, end, coding_system, destination;
7426 {
7427   return code_convert_region (start, end, coding_system, destination, 0, 0);
7428 }
7429
7430 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
7431        3, 4, "r\nzCoding system: ",
7432        doc: /* Encode the current region by specified coding system.
7433 When called from a program, takes three arguments:
7434 START, END, and CODING-SYSTEM.  START and END are buffer positions.
7435
7436 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7437 If nil, the region between START and END is replace by the encoded text.
7438 If buffer, the encoded text is inserted in the buffer.
7439 If t, the encoded text is returned.
7440
7441 This function sets `last-coding-system-used' to the precise coding system
7442 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7443 not fully specified.)
7444 It returns the length of the encoded text.  */)
7445   (start, end, coding_system, destination)
7446      Lisp_Object start, end, coding_system, destination;
7447 {
7448   return code_convert_region (start, end, coding_system, destination, 1, 0);
7449 }
7450
7451 Lisp_Object
7452 code_convert_string (string, coding_system, dst_object,
7453                      encodep, nocopy, norecord)
7454      Lisp_Object string, coding_system, dst_object;
7455      int encodep, nocopy, norecord;
7456 {
7457   struct coding_system coding;
7458   EMACS_INT chars, bytes;
7459
7460   CHECK_STRING (string);
7461   if (NILP (coding_system))
7462     {
7463       if (! norecord)
7464         Vlast_coding_system_used = Qno_conversion;
7465       if (NILP (dst_object))
7466         return (nocopy ? Fcopy_sequence (string) : string);
7467     }
7468
7469   if (NILP (coding_system))
7470     coding_system = Qno_conversion;
7471   else
7472     CHECK_CODING_SYSTEM (coding_system);
7473   if (NILP (dst_object))
7474     dst_object = Qt;
7475   else if (! EQ (dst_object, Qt))
7476     CHECK_BUFFER (dst_object);
7477
7478   setup_coding_system (coding_system, &coding);
7479   coding.mode |= CODING_MODE_LAST_BLOCK;
7480   chars = SCHARS (string);
7481   bytes = SBYTES (string);
7482   if (encodep)
7483     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7484   else
7485     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7486   if (! norecord)
7487     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7488
7489   if (coding.result != CODING_RESULT_SUCCESS)
7490     error ("Code conversion error: %d", coding.result);
7491
7492   return (BUFFERP (dst_object)
7493           ? make_number (coding.produced_char)
7494           : coding.dst_object);
7495 }
7496
7497
7498 /* Encode or decode STRING according to CODING_SYSTEM.
7499    Do not set Vlast_coding_system_used.
7500
7501    This function is called only from macros DECODE_FILE and
7502    ENCODE_FILE, thus we ignore character composition.  */
7503
7504 Lisp_Object
7505 code_convert_string_norecord (string, coding_system, encodep)
7506      Lisp_Object string, coding_system;
7507      int encodep;
7508 {
7509   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
7510 }
7511
7512
7513 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7514        2, 4, 0,
7515        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7516
7517 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7518 if the decoding operation is trivial.
7519
7520 Optional fourth arg BUFFER non-nil meant that the decoded text is
7521 inserted in BUFFER instead of returned as a string.  In this case,
7522 the return value is BUFFER.
7523
7524 This function sets `last-coding-system-used' to the precise coding system
7525 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7526 not fully specified.  */)
7527   (string, coding_system, nocopy, buffer)
7528      Lisp_Object string, coding_system, nocopy, buffer;
7529 {
7530   return code_convert_string (string, coding_system, buffer,
7531                               0, ! NILP (nocopy), 0);
7532 }
7533
7534 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7535        2, 4, 0,
7536        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7537
7538 Optional third arg NOCOPY non-nil means it is OK to return STRING
7539 itself if the encoding operation is trivial.
7540
7541 Optional fourth arg BUFFER non-nil meant that the encoded text is
7542 inserted in BUFFER instead of returned as a string.  In this case,
7543 the return value is BUFFER.
7544
7545 This function sets `last-coding-system-used' to the precise coding system
7546 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7547 not fully specified.)  */)
7548      (string, coding_system, nocopy, buffer)
7549      Lisp_Object string, coding_system, nocopy, buffer;
7550 {
7551   return code_convert_string (string, coding_system, buffer,
7552                               1, ! NILP (nocopy), 1);
7553 }
7554
7555 \f
7556 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7557        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7558 Return the corresponding character.  */)
7559      (code)
7560      Lisp_Object code;
7561 {
7562   Lisp_Object spec, attrs, val;
7563   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7564   int c;
7565
7566   CHECK_NATNUM (code);
7567   c = XFASTINT (code);
7568   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7569   attrs = AREF (spec, 0);
7570
7571   if (ASCII_BYTE_P (c)
7572       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7573     return code;
7574
7575   val = CODING_ATTR_CHARSET_LIST (attrs);
7576   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7577   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7578   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
7579
7580   if (c <= 0x7F)
7581     charset = charset_roman;
7582   else if (c >= 0xA0 && c < 0xDF)
7583     {
7584       charset = charset_kana;
7585       c -= 0x80;
7586     }
7587   else
7588     {
7589       int s1 = c >> 8, s2 = c & 0xFF;
7590
7591       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7592           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7593         error ("Invalid code: %d", code);
7594       SJIS_TO_JIS (c);
7595       charset = charset_kanji;
7596     }
7597   c = DECODE_CHAR (charset, c);
7598   if (c < 0)
7599     error ("Invalid code: %d", code);
7600   return make_number (c);
7601 }
7602
7603
7604 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7605        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7606 Return the corresponding code in SJIS.  */)
7607      (ch)
7608     Lisp_Object ch;
7609 {
7610   Lisp_Object spec, attrs, charset_list;
7611   int c;
7612   struct charset *charset;
7613   unsigned code;
7614
7615   CHECK_CHARACTER (ch);
7616   c = XFASTINT (ch);
7617   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7618   attrs = AREF (spec, 0);
7619
7620   if (ASCII_CHAR_P (c)
7621       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7622     return ch;
7623
7624   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7625   charset = char_charset (c, charset_list, &code);
7626   if (code == CHARSET_INVALID_CODE (charset))
7627     error ("Can't encode by shift_jis encoding: %d", c);
7628   JIS_TO_SJIS (code);
7629
7630   return make_number (code);
7631 }
7632
7633 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7634        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7635 Return the corresponding character.  */)
7636      (code)
7637      Lisp_Object code;
7638 {
7639   Lisp_Object spec, attrs, val;
7640   struct charset *charset_roman, *charset_big5, *charset;
7641   int c;
7642
7643   CHECK_NATNUM (code);
7644   c = XFASTINT (code);
7645   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7646   attrs = AREF (spec, 0);
7647
7648   if (ASCII_BYTE_P (c)
7649       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7650     return code;
7651
7652   val = CODING_ATTR_CHARSET_LIST (attrs);
7653   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7654   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7655
7656   if (c <= 0x7F)
7657     charset = charset_roman;
7658   else
7659     {
7660       int b1 = c >> 8, b2 = c & 0x7F;
7661       if (b1 < 0xA1 || b1 > 0xFE
7662           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7663         error ("Invalid code: %d", code);
7664       charset = charset_big5;
7665     }
7666   c = DECODE_CHAR (charset, (unsigned )c);
7667   if (c < 0)
7668     error ("Invalid code: %d", code);
7669   return make_number (c);
7670 }
7671
7672 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7673        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7674 Return the corresponding character code in Big5.  */)
7675      (ch)
7676      Lisp_Object ch;
7677 {
7678   Lisp_Object spec, attrs, charset_list;
7679   struct charset *charset;
7680   int c;
7681   unsigned code;
7682
7683   CHECK_CHARACTER (ch);
7684   c = XFASTINT (ch);
7685   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7686   attrs = AREF (spec, 0);
7687   if (ASCII_CHAR_P (c)
7688       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7689     return ch;
7690
7691   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7692   charset = char_charset (c, charset_list, &code);
7693   if (code == CHARSET_INVALID_CODE (charset))
7694     error ("Can't encode by Big5 encoding: %d", c);
7695
7696   return make_number (code);
7697 }
7698
7699 \f
7700 DEFUN ("set-terminal-coding-system-internal",
7701        Fset_terminal_coding_system_internal,
7702        Sset_terminal_coding_system_internal, 1, 1, 0,
7703        doc: /* Internal use only.  */)
7704      (coding_system)
7705      Lisp_Object coding_system;
7706 {
7707   CHECK_SYMBOL (coding_system);
7708   setup_coding_system (Fcheck_coding_system (coding_system),
7709                         &terminal_coding);
7710
7711   /* We had better not send unsafe characters to terminal.  */
7712   terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7713   /* Characer composition should be disabled.  */
7714   terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7715   terminal_coding.src_multibyte = 1;
7716   terminal_coding.dst_multibyte = 0;
7717   return Qnil;
7718 }
7719
7720 DEFUN ("set-safe-terminal-coding-system-internal",
7721        Fset_safe_terminal_coding_system_internal,
7722        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7723        doc: /* Internal use only.  */)
7724      (coding_system)
7725      Lisp_Object coding_system;
7726 {
7727   CHECK_SYMBOL (coding_system);
7728   setup_coding_system (Fcheck_coding_system (coding_system),
7729                        &safe_terminal_coding);
7730   /* Characer composition should be disabled.  */
7731   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7732   safe_terminal_coding.src_multibyte = 1;
7733   safe_terminal_coding.dst_multibyte = 0;
7734   return Qnil;
7735 }
7736
7737 DEFUN ("terminal-coding-system",
7738        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
7739        doc: /* Return coding system specified for terminal output.  */)
7740      ()
7741 {
7742   return CODING_ID_NAME (terminal_coding.id);
7743 }
7744
7745 DEFUN ("set-keyboard-coding-system-internal",
7746        Fset_keyboard_coding_system_internal,
7747        Sset_keyboard_coding_system_internal, 1, 1, 0,
7748        doc: /* Internal use only.  */)
7749      (coding_system)
7750      Lisp_Object coding_system;
7751 {
7752   CHECK_SYMBOL (coding_system);
7753   setup_coding_system (Fcheck_coding_system (coding_system),
7754                        &keyboard_coding);
7755   /* Characer composition should be disabled.  */
7756   keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7757   return Qnil;
7758 }
7759
7760 DEFUN ("keyboard-coding-system",
7761        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
7762        doc: /* Return coding system specified for decoding keyboard input.  */)
7763      ()
7764 {
7765   return CODING_ID_NAME (keyboard_coding.id);
7766 }
7767
7768 \f
7769 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7770        Sfind_operation_coding_system,  1, MANY, 0,
7771        doc: /* Choose a coding system for an operation based on the target name.
7772 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7773 DECODING-SYSTEM is the coding system to use for decoding
7774 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7775 for encoding (in case OPERATION does encoding).
7776
7777 The first argument OPERATION specifies an I/O primitive:
7778   For file I/O, `insert-file-contents' or `write-region'.
7779   For process I/O, `call-process', `call-process-region', or `start-process'.
7780   For network I/O, `open-network-stream'.
7781
7782 The remaining arguments should be the same arguments that were passed
7783 to the primitive.  Depending on which primitive, one of those arguments
7784 is selected as the TARGET.  For example, if OPERATION does file I/O,
7785 whichever argument specifies the file name is TARGET.
7786
7787 TARGET has a meaning which depends on OPERATION:
7788   For file I/O, TARGET is a file name.
7789   For process I/O, TARGET is a process name.
7790   For network I/O, TARGET is a service name or a port number
7791
7792 This function looks up what specified for TARGET in,
7793 `file-coding-system-alist', `process-coding-system-alist',
7794 or `network-coding-system-alist' depending on OPERATION.
7795 They may specify a coding system, a cons of coding systems,
7796 or a function symbol to call.
7797 In the last case, we call the function with one argument,
7798 which is a list of all the arguments given to this function.
7799
7800 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7801      (nargs, args)
7802      int nargs;
7803      Lisp_Object *args;
7804 {
7805   Lisp_Object operation, target_idx, target, val;
7806   register Lisp_Object chain;
7807
7808   if (nargs < 2)
7809     error ("Too few arguments");
7810   operation = args[0];
7811   if (!SYMBOLP (operation)
7812       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7813     error ("Invalid first arguement");
7814   if (nargs < 1 + XINT (target_idx))
7815     error ("Too few arguments for operation: %s",
7816            SDATA (SYMBOL_NAME (operation)));
7817   target = args[XINT (target_idx) + 1];
7818   if (!(STRINGP (target)
7819         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7820     error ("Invalid %dth argument", XINT (target_idx) + 1);
7821
7822   chain = ((EQ (operation, Qinsert_file_contents)
7823             || EQ (operation, Qwrite_region))
7824            ? Vfile_coding_system_alist
7825            : (EQ (operation, Qopen_network_stream)
7826               ? Vnetwork_coding_system_alist
7827               : Vprocess_coding_system_alist));
7828   if (NILP (chain))
7829     return Qnil;
7830
7831   for (; CONSP (chain); chain = XCDR (chain))
7832     {
7833       Lisp_Object elt;
7834
7835       elt = XCAR (chain);
7836       if (CONSP (elt)
7837           && ((STRINGP (target)
7838                && STRINGP (XCAR (elt))
7839                && fast_string_match (XCAR (elt), target) >= 0)
7840               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7841         {
7842           val = XCDR (elt);
7843           /* Here, if VAL is both a valid coding system and a valid
7844              function symbol, we return VAL as a coding system.  */
7845           if (CONSP (val))
7846             return val;
7847           if (! SYMBOLP (val))
7848             return Qnil;
7849           if (! NILP (Fcoding_system_p (val)))
7850             return Fcons (val, val);
7851           if (! NILP (Ffboundp (val)))
7852             {
7853               val = call1 (val, Flist (nargs, args));
7854               if (CONSP (val))
7855                 return val;
7856               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7857                 return Fcons (val, val);
7858             }
7859           return Qnil;
7860         }
7861     }
7862   return Qnil;
7863 }
7864
7865 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7866        Sset_coding_system_priority, 0, MANY, 0,
7867        doc: /* Assign higher priority to the coding systems given as arguments.
7868 If multiple coding systems belongs to the same category,
7869 all but the first one are ignored.
7870
7871 usage: (set-coding-system-priority ...)  */)
7872      (nargs, args)
7873      int nargs;
7874      Lisp_Object *args;
7875 {
7876   int i, j;
7877   int changed[coding_category_max];
7878   enum coding_category priorities[coding_category_max];
7879
7880   bzero (changed, sizeof changed);
7881
7882   for (i = j = 0; i < nargs; i++)
7883     {
7884       enum coding_category category;
7885       Lisp_Object spec, attrs;
7886
7887       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7888       attrs = AREF (spec, 0);
7889       category = XINT (CODING_ATTR_CATEGORY (attrs));
7890       if (changed[category])
7891         /* Ignore this coding system because a coding system of the
7892            same category already had a higher priority.  */
7893         continue;
7894       changed[category] = 1;
7895       priorities[j++] = category;
7896       if (coding_categories[category].id >= 0
7897           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7898         setup_coding_system (args[i], &coding_categories[category]);
7899       Fset (AREF (Vcoding_category_table, category), args[i]);
7900     }
7901
7902   /* Now we have decided top J priorities.  Reflect the order of the
7903      original priorities to the remaining priorities.  */
7904
7905   for (i = j, j = 0; i < coding_category_max; i++, j++)
7906     {
7907       while (j < coding_category_max
7908              && changed[coding_priorities[j]])
7909         j++;
7910       if (j == coding_category_max)
7911         abort ();
7912       priorities[i] = coding_priorities[j];
7913     }
7914
7915   bcopy (priorities, coding_priorities, sizeof priorities);
7916
7917   /* Update `coding-category-list'.  */
7918   Vcoding_category_list = Qnil;
7919   for (i = coding_category_max - 1; i >= 0; i--)
7920     Vcoding_category_list
7921       = Fcons (AREF (Vcoding_category_table, priorities[i]),
7922                Vcoding_category_list);
7923
7924   return Qnil;
7925 }
7926
7927 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7928        Scoding_system_priority_list, 0, 1, 0,
7929        doc: /* Return a list of coding systems ordered by their priorities.
7930 HIGHESTP non-nil means just return the highest priority one.  */)
7931      (highestp)
7932      Lisp_Object highestp;
7933 {
7934   int i;
7935   Lisp_Object val;
7936
7937   for (i = 0, val = Qnil; i < coding_category_max; i++)
7938     {
7939       enum coding_category category = coding_priorities[i];
7940       int id = coding_categories[category].id;
7941       Lisp_Object attrs;
7942
7943       if (id < 0)
7944         continue;
7945       attrs = CODING_ID_ATTRS (id);
7946       if (! NILP (highestp))
7947         return CODING_ATTR_BASE_NAME (attrs);
7948       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7949     }
7950   return Fnreverse (val);
7951 }
7952
7953 static char *suffixes[] = { "-unix", "-dos", "-mac" };
7954
7955 static Lisp_Object
7956 make_subsidiaries (base)
7957      Lisp_Object base;
7958 {
7959   Lisp_Object subsidiaries;
7960   int base_name_len = SBYTES (SYMBOL_NAME (base));
7961   char *buf = (char *) alloca (base_name_len + 6);
7962   int i;
7963
7964   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
7965   subsidiaries = Fmake_vector (make_number (3), Qnil);
7966   for (i = 0; i < 3; i++)
7967     {
7968       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7969       ASET (subsidiaries, i, intern (buf));
7970     }
7971   return subsidiaries;
7972 }
7973
7974
7975 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7976        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
7977        doc: /* For internal use only.
7978 usage: (define-coding-system-internal ...)  */)
7979      (nargs, args)
7980      int nargs;
7981      Lisp_Object *args;
7982 {
7983   Lisp_Object name;
7984   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
7985   Lisp_Object attrs;            /* Vector of attributes.  */
7986   Lisp_Object eol_type;
7987   Lisp_Object aliases;
7988   Lisp_Object coding_type, charset_list, safe_charsets;
7989   enum coding_category category;
7990   Lisp_Object tail, val;
7991   int max_charset_id = 0;
7992   int i;
7993
7994   if (nargs < coding_arg_max)
7995     goto short_args;
7996
7997   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7998
7999   name = args[coding_arg_name];
8000   CHECK_SYMBOL (name);
8001   CODING_ATTR_BASE_NAME (attrs) = name;
8002
8003   val = args[coding_arg_mnemonic];
8004   if (! STRINGP (val))
8005     CHECK_CHARACTER (val);
8006   CODING_ATTR_MNEMONIC (attrs) = val;
8007
8008   coding_type = args[coding_arg_coding_type];
8009   CHECK_SYMBOL (coding_type);
8010   CODING_ATTR_TYPE (attrs) = coding_type;
8011
8012   charset_list = args[coding_arg_charset_list];
8013   if (SYMBOLP (charset_list))
8014     {
8015       if (EQ (charset_list, Qiso_2022))
8016         {
8017           if (! EQ (coding_type, Qiso_2022))
8018             error ("Invalid charset-list");
8019           charset_list = Viso_2022_charset_list;
8020         }
8021       else if (EQ (charset_list, Qemacs_mule))
8022         {
8023           if (! EQ (coding_type, Qemacs_mule))
8024             error ("Invalid charset-list");
8025           charset_list = Vemacs_mule_charset_list;
8026         }
8027       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8028         if (max_charset_id < XFASTINT (XCAR (tail)))
8029           max_charset_id = XFASTINT (XCAR (tail));
8030     }
8031   else
8032     {
8033       charset_list = Fcopy_sequence (charset_list);
8034       for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
8035         {
8036           struct charset *charset;
8037
8038           val = Fcar (tail);
8039           CHECK_CHARSET_GET_CHARSET (val, charset);
8040           if (EQ (coding_type, Qiso_2022)
8041               ? CHARSET_ISO_FINAL (charset) < 0
8042               : EQ (coding_type, Qemacs_mule)
8043               ? CHARSET_EMACS_MULE_ID (charset) < 0
8044               : 0)
8045             error ("Can't handle charset `%s'",
8046                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8047
8048           XSETCAR (tail, make_number (charset->id));
8049           if (max_charset_id < charset->id)
8050             max_charset_id = charset->id;
8051         }
8052     }
8053   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
8054
8055   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8056                                 make_number (255));
8057   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8058     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
8059   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
8060
8061   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
8062
8063   val = args[coding_arg_decode_translation_table];
8064   if (! NILP (val))
8065     CHECK_CHAR_TABLE (val);
8066   CODING_ATTR_DECODE_TBL (attrs) = val;
8067
8068   val = args[coding_arg_encode_translation_table];
8069   if (! NILP (val))
8070     CHECK_CHAR_TABLE (val);
8071   CODING_ATTR_ENCODE_TBL (attrs) = val;
8072
8073   val = args[coding_arg_post_read_conversion];
8074   CHECK_SYMBOL (val);
8075   CODING_ATTR_POST_READ (attrs) = val;
8076
8077   val = args[coding_arg_pre_write_conversion];
8078   CHECK_SYMBOL (val);
8079   CODING_ATTR_PRE_WRITE (attrs) = val;
8080
8081   val = args[coding_arg_default_char];
8082   if (NILP (val))
8083     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8084   else
8085     {
8086       CHECK_CHARACTER (val);
8087       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8088     }
8089
8090   val = args[coding_arg_for_unibyte];
8091   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
8092
8093   val = args[coding_arg_plist];
8094   CHECK_LIST (val);
8095   CODING_ATTR_PLIST (attrs) = val;
8096
8097   if (EQ (coding_type, Qcharset))
8098     {
8099       Lisp_Object list;
8100       /* Generate a lisp vector of 256 elements.  Each element is nil,
8101          integer, or a list of charset IDs.
8102
8103          If Nth element is nil, the byte code N is invalid in this
8104          coding system.
8105
8106          If Nth element is a number NUM, N is the first byte of a
8107          charset whose ID is NUM.
8108
8109          If Nth element is a list of charset IDs, N is the first byte
8110          of one of them.  The list is sorted by dimensions of the
8111          charsets.  A charset of smaller dimension comes firtst.
8112       */
8113       for (list = Qnil, tail = charset_list; CONSP (tail); tail = XCDR (tail))
8114         {
8115           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8116
8117           if (charset->method == CHARSET_METHOD_SUPERSET)
8118             {
8119               val = CHARSET_SUPERSET (charset);
8120               for (; CONSP (val); val = XCDR (val))
8121                 list = Fcons (XCAR (XCAR (val)), list);
8122             }
8123           else
8124             list = Fcons (XCAR (tail), list);
8125         }
8126
8127       val = Fmake_vector (make_number (256), Qnil);
8128
8129       for (tail = Fnreverse (list); CONSP (tail); tail = XCDR (tail))
8130         {
8131           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8132           int dim = CHARSET_DIMENSION (charset);
8133           int idx = (dim - 1) * 4;
8134
8135           if (CHARSET_ASCII_COMPATIBLE_P (charset))
8136             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8137
8138           for (i = charset->code_space[idx];
8139                i <= charset->code_space[idx + 1]; i++)
8140             {
8141               Lisp_Object tmp, tmp2;
8142               int dim2;
8143
8144               tmp = AREF (val, i);
8145               if (NILP (tmp))
8146                 tmp = XCAR (tail);
8147               else if (NUMBERP (tmp))
8148                 {
8149                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8150                   if (dim < dim2)
8151                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
8152                   else
8153                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
8154                 }
8155               else
8156                 {
8157                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8158                     {
8159                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8160                       if (dim < dim2)
8161                         break;
8162                     }
8163                   if (NILP (tmp2))
8164                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8165                   else
8166                     {
8167                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8168                       XSETCAR (tmp2, XCAR (tail));
8169                     }
8170                 }
8171               ASET (val, i, tmp);
8172             }
8173         }
8174       ASET (attrs, coding_attr_charset_valids, val);
8175       category = coding_category_charset;
8176     }
8177   else if (EQ (coding_type, Qccl))
8178     {
8179       Lisp_Object valids;
8180
8181       if (nargs < coding_arg_ccl_max)
8182         goto short_args;
8183
8184       val = args[coding_arg_ccl_decoder];
8185       CHECK_CCL_PROGRAM (val);
8186       if (VECTORP (val))
8187         val = Fcopy_sequence (val);
8188       ASET (attrs, coding_attr_ccl_decoder, val);
8189
8190       val = args[coding_arg_ccl_encoder];
8191       CHECK_CCL_PROGRAM (val);
8192       if (VECTORP (val))
8193         val = Fcopy_sequence (val);
8194       ASET (attrs, coding_attr_ccl_encoder, val);
8195
8196       val = args[coding_arg_ccl_valids];
8197       valids = Fmake_string (make_number (256), make_number (0));
8198       for (tail = val; !NILP (tail); tail = Fcdr (tail))
8199         {
8200           int from, to;
8201
8202           val = Fcar (tail);
8203           if (INTEGERP (val))
8204             {
8205               from = to = XINT (val);
8206               if (from < 0 || from > 255)
8207                 args_out_of_range_3 (val, make_number (0), make_number (255));
8208             }
8209           else
8210             {
8211               CHECK_CONS (val);
8212               CHECK_NATNUM_CAR (val);
8213               CHECK_NATNUM_CDR (val);
8214               from = XINT (XCAR (val));
8215               if (from > 255)
8216                 args_out_of_range_3 (XCAR (val),
8217                                      make_number (0), make_number (255));
8218               to = XINT (XCDR (val));
8219               if (to < from || to > 255)
8220                 args_out_of_range_3 (XCDR (val),
8221                                      XCAR (val), make_number (255));
8222             }
8223           for (i = from; i <= to; i++)
8224             SSET (valids, i, 1);
8225         }
8226       ASET (attrs, coding_attr_ccl_valids, valids);
8227
8228       category = coding_category_ccl;
8229     }
8230   else if (EQ (coding_type, Qutf_16))
8231     {
8232       Lisp_Object bom, endian;
8233
8234       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8235
8236       if (nargs < coding_arg_utf16_max)
8237         goto short_args;
8238
8239       bom = args[coding_arg_utf16_bom];
8240       if (! NILP (bom) && ! EQ (bom, Qt))
8241         {
8242           CHECK_CONS (bom);
8243           val = XCAR (bom);
8244           CHECK_CODING_SYSTEM (val);
8245           val = XCDR (bom);
8246           CHECK_CODING_SYSTEM (val);
8247         }
8248       ASET (attrs, coding_attr_utf_16_bom, bom);
8249
8250       endian = args[coding_arg_utf16_endian];
8251       CHECK_SYMBOL (endian);
8252       if (NILP (endian))
8253         endian = Qbig;
8254       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8255         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
8256       ASET (attrs, coding_attr_utf_16_endian, endian);
8257
8258       category = (CONSP (bom)
8259                   ? coding_category_utf_16_auto
8260                   : NILP (bom)
8261                   ? (EQ (endian, Qbig)
8262                      ? coding_category_utf_16_be_nosig
8263                      : coding_category_utf_16_le_nosig)
8264                   : (EQ (endian, Qbig)
8265                      ? coding_category_utf_16_be
8266                      : coding_category_utf_16_le));
8267     }
8268   else if (EQ (coding_type, Qiso_2022))
8269     {
8270       Lisp_Object initial, reg_usage, request, flags;
8271       int i;
8272
8273       if (nargs < coding_arg_iso2022_max)
8274         goto short_args;
8275
8276       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8277       CHECK_VECTOR (initial);
8278       for (i = 0; i < 4; i++)
8279         {
8280           val = Faref (initial, make_number (i));
8281           if (! NILP (val))
8282             {
8283               struct charset *charset;
8284
8285               CHECK_CHARSET_GET_CHARSET (val, charset);
8286               ASET (initial, i, make_number (CHARSET_ID (charset)));
8287               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8288                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8289             }
8290           else
8291             ASET (initial, i, make_number (-1));
8292         }
8293
8294       reg_usage = args[coding_arg_iso2022_reg_usage];
8295       CHECK_CONS (reg_usage);
8296       CHECK_NUMBER_CAR (reg_usage);
8297       CHECK_NUMBER_CDR (reg_usage);
8298
8299       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8300       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
8301         {
8302           int id;
8303           Lisp_Object tmp;
8304
8305           val = Fcar (tail);
8306           CHECK_CONS (val);
8307           tmp = XCAR (val);
8308           CHECK_CHARSET_GET_ID (tmp, id);
8309           CHECK_NATNUM_CDR (val);
8310           if (XINT (XCDR (val)) >= 4)
8311             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8312           XSETCAR (val, make_number (id));
8313         }
8314
8315       flags = args[coding_arg_iso2022_flags];
8316       CHECK_NATNUM (flags);
8317       i = XINT (flags);
8318       if (EQ (args[coding_arg_charset_list], Qiso_2022))
8319         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8320
8321       ASET (attrs, coding_attr_iso_initial, initial);
8322       ASET (attrs, coding_attr_iso_usage, reg_usage);
8323       ASET (attrs, coding_attr_iso_request, request);
8324       ASET (attrs, coding_attr_iso_flags, flags);
8325       setup_iso_safe_charsets (attrs);
8326
8327       if (i & CODING_ISO_FLAG_SEVEN_BITS)
8328         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8329                           | CODING_ISO_FLAG_SINGLE_SHIFT))
8330                     ? coding_category_iso_7_else
8331                     : EQ (args[coding_arg_charset_list], Qiso_2022)
8332                     ? coding_category_iso_7
8333                     : coding_category_iso_7_tight);
8334       else
8335         {
8336           int id = XINT (AREF (initial, 1));
8337
8338           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
8339                        || EQ (args[coding_arg_charset_list], Qiso_2022)
8340                        || id < 0)
8341                       ? coding_category_iso_8_else
8342                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8343                       ? coding_category_iso_8_1
8344                       : coding_category_iso_8_2);
8345         }
8346       if (category != coding_category_iso_8_1
8347           && category != coding_category_iso_8_2)
8348         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8349     }
8350   else if (EQ (coding_type, Qemacs_mule))
8351     {
8352       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8353         ASET (attrs, coding_attr_emacs_mule_full, Qt);
8354       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8355       category = coding_category_emacs_mule;
8356     }
8357   else if (EQ (coding_type, Qshift_jis))
8358     {
8359
8360       struct charset *charset;
8361
8362       if (XINT (Flength (charset_list)) != 3)
8363         error ("There should be just three charsets");
8364
8365       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8366       if (CHARSET_DIMENSION (charset) != 1)
8367         error ("Dimension of charset %s is not one",
8368                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8369       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8370         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8371
8372       charset_list = XCDR (charset_list);
8373       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8374       if (CHARSET_DIMENSION (charset) != 1)
8375         error ("Dimension of charset %s is not one",
8376                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8377
8378       charset_list = XCDR (charset_list);
8379       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8380       if (CHARSET_DIMENSION (charset) != 2)
8381         error ("Dimension of charset %s is not two",
8382                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8383
8384       category = coding_category_sjis;
8385       Vsjis_coding_system = name;
8386     }
8387   else if (EQ (coding_type, Qbig5))
8388     {
8389       struct charset *charset;
8390
8391       if (XINT (Flength (charset_list)) != 2)
8392         error ("There should be just two charsets");
8393
8394       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8395       if (CHARSET_DIMENSION (charset) != 1)
8396         error ("Dimension of charset %s is not one",
8397                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8398       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8399         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8400
8401       charset_list = XCDR (charset_list);
8402       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8403       if (CHARSET_DIMENSION (charset) != 2)
8404         error ("Dimension of charset %s is not two",
8405                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8406
8407       category = coding_category_big5;
8408       Vbig5_coding_system = name;
8409     }
8410   else if (EQ (coding_type, Qraw_text))
8411     {
8412       category = coding_category_raw_text;
8413       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8414     }
8415   else if (EQ (coding_type, Qutf_8))
8416     {
8417       category = coding_category_utf_8;
8418       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8419     }
8420   else if (EQ (coding_type, Qundecided))
8421     category = coding_category_undecided;
8422   else
8423     error ("Invalid coding system type: %s",
8424            SDATA (SYMBOL_NAME (coding_type)));
8425
8426   CODING_ATTR_CATEGORY (attrs) = make_number (category);
8427   CODING_ATTR_PLIST (attrs)
8428     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
8429                                 CODING_ATTR_PLIST (attrs)));
8430
8431   eol_type = args[coding_arg_eol_type];
8432   if (! NILP (eol_type)
8433       && ! EQ (eol_type, Qunix)
8434       && ! EQ (eol_type, Qdos)
8435       && ! EQ (eol_type, Qmac))
8436     error ("Invalid eol-type");
8437
8438   aliases = Fcons (name, Qnil);
8439
8440   if (NILP (eol_type))
8441     {
8442       eol_type = make_subsidiaries (name);
8443       for (i = 0; i < 3; i++)
8444         {
8445           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8446
8447           this_name = AREF (eol_type, i);
8448           this_aliases = Fcons (this_name, Qnil);
8449           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8450           this_spec = Fmake_vector (make_number (3), attrs);
8451           ASET (this_spec, 1, this_aliases);
8452           ASET (this_spec, 2, this_eol_type);
8453           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8454           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8455           Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8456                                         Vcoding_system_alist);
8457         }
8458     }
8459
8460   spec_vec = Fmake_vector (make_number (3), attrs);
8461   ASET (spec_vec, 1, aliases);
8462   ASET (spec_vec, 2, eol_type);
8463
8464   Fputhash (name, spec_vec, Vcoding_system_hash_table);
8465   Vcoding_system_list = Fcons (name, Vcoding_system_list);
8466   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8467                                 Vcoding_system_alist);
8468
8469   {
8470     int id = coding_categories[category].id;
8471
8472     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8473       setup_coding_system (name, &coding_categories[category]);
8474   }
8475
8476   return Qnil;
8477
8478  short_args:
8479   return Fsignal (Qwrong_number_of_arguments,
8480                   Fcons (intern ("define-coding-system-internal"),
8481                          make_number (nargs)));
8482 }
8483
8484 /* Fixme: should this record the alias relationships for
8485    diagnostics?  Should it update coding-system-list?  */
8486 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8487        Sdefine_coding_system_alias, 2, 2, 0,
8488        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
8489      (alias, coding_system)
8490      Lisp_Object alias, coding_system;
8491 {
8492   Lisp_Object spec, aliases, eol_type;
8493
8494   CHECK_SYMBOL (alias);
8495   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8496   aliases = AREF (spec, 1);
8497   while (!NILP (XCDR (aliases)))
8498     aliases = XCDR (aliases);
8499   XSETCDR (aliases, Fcons (alias, Qnil));
8500
8501   eol_type = AREF (spec, 2);
8502   if (VECTORP (eol_type))
8503     {
8504       Lisp_Object subsidiaries;
8505       int i;
8506
8507       subsidiaries = make_subsidiaries (alias);
8508       for (i = 0; i < 3; i++)
8509         Fdefine_coding_system_alias (AREF (subsidiaries, i),
8510                                      AREF (eol_type, i));
8511
8512       ASET (spec, 2, subsidiaries);
8513     }
8514
8515   Fputhash (alias, spec, Vcoding_system_hash_table);
8516   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8517                                 Vcoding_system_alist);
8518
8519   return Qnil;
8520 }
8521
8522 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8523        1, 1, 0,
8524        doc: /* Return the base of CODING-SYSTEM.
8525 Any alias or subsidiary coding system is not a base coding system.  */)
8526   (coding_system)
8527      Lisp_Object coding_system;
8528 {
8529   Lisp_Object spec, attrs;
8530
8531   if (NILP (coding_system))
8532     return (Qno_conversion);
8533   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8534   attrs = AREF (spec, 0);
8535   return CODING_ATTR_BASE_NAME (attrs);
8536 }
8537
8538 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8539        1, 1, 0,
8540        doc: "Return the property list of CODING-SYSTEM.")
8541      (coding_system)
8542      Lisp_Object coding_system;
8543 {
8544   Lisp_Object spec, attrs;
8545
8546   if (NILP (coding_system))
8547     coding_system = Qno_conversion;
8548   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8549   attrs = AREF (spec, 0);
8550   return CODING_ATTR_PLIST (attrs);
8551 }
8552
8553
8554 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8555        1, 1, 0,
8556        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
8557      (coding_system)
8558      Lisp_Object coding_system;
8559 {
8560   Lisp_Object spec;
8561
8562   if (NILP (coding_system))
8563     coding_system = Qno_conversion;
8564   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8565   return AREF (spec, 1);
8566 }
8567
8568 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8569        Scoding_system_eol_type, 1, 1, 0,
8570        doc: /* Return eol-type of CODING-SYSTEM.
8571 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8572
8573 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8574 and CR respectively.
8575
8576 A vector value indicates that a format of end-of-line should be
8577 detected automatically.  Nth element of the vector is the subsidiary
8578 coding system whose eol-type is N.  */)
8579      (coding_system)
8580      Lisp_Object coding_system;
8581 {
8582   Lisp_Object spec, eol_type;
8583   int n;
8584
8585   if (NILP (coding_system))
8586     coding_system = Qno_conversion;
8587   if (! CODING_SYSTEM_P (coding_system))
8588     return Qnil;
8589   spec = CODING_SYSTEM_SPEC (coding_system);
8590   eol_type = AREF (spec, 2);
8591   if (VECTORP (eol_type))
8592     return Fcopy_sequence (eol_type);
8593   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8594   return make_number (n);
8595 }
8596
8597 #endif /* emacs */
8598
8599 \f
8600 /*** 9. Post-amble ***/
8601
8602 void
8603 init_coding_once ()
8604 {
8605   int i;
8606
8607   for (i = 0; i < coding_category_max; i++)
8608     {
8609       coding_categories[i].id = -1;
8610       coding_priorities[i] = i;
8611     }
8612
8613   /* ISO2022 specific initialize routine.  */
8614   for (i = 0; i < 0x20; i++)
8615     iso_code_class[i] = ISO_control_0;
8616   for (i = 0x21; i < 0x7F; i++)
8617     iso_code_class[i] = ISO_graphic_plane_0;
8618   for (i = 0x80; i < 0xA0; i++)
8619     iso_code_class[i] = ISO_control_1;
8620   for (i = 0xA1; i < 0xFF; i++)
8621     iso_code_class[i] = ISO_graphic_plane_1;
8622   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8623   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8624   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
8625   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8626   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8627   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8628   iso_code_class[ISO_CODE_ESC] = ISO_escape;
8629   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8630   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
8631   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
8632
8633   for (i = 0; i < 256; i++)
8634     {
8635       emacs_mule_bytes[i] = 1;
8636     }
8637   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
8638   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
8639   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
8640   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
8641 }
8642
8643 #ifdef emacs
8644
8645 void
8646 syms_of_coding ()
8647 {
8648   staticpro (&Vcoding_system_hash_table);
8649   {
8650     Lisp_Object args[2];
8651     args[0] = QCtest;
8652     args[1] = Qeq;
8653     Vcoding_system_hash_table = Fmake_hash_table (2, args);
8654   }
8655
8656   staticpro (&Vsjis_coding_system);
8657   Vsjis_coding_system = Qnil;
8658
8659   staticpro (&Vbig5_coding_system);
8660   Vbig5_coding_system = Qnil;
8661
8662   staticpro (&Vcode_conversion_work_buf_list);
8663   Vcode_conversion_work_buf_list = Qnil;
8664
8665   staticpro (&Vcode_conversion_reused_work_buf);
8666   Vcode_conversion_reused_work_buf = Qnil;
8667
8668   DEFSYM (Qcharset, "charset");
8669   DEFSYM (Qtarget_idx, "target-idx");
8670   DEFSYM (Qcoding_system_history, "coding-system-history");
8671   Fset (Qcoding_system_history, Qnil);
8672
8673   /* Target FILENAME is the first argument.  */
8674   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
8675   /* Target FILENAME is the third argument.  */
8676   Fput (Qwrite_region, Qtarget_idx, make_number (2));
8677
8678   DEFSYM (Qcall_process, "call-process");
8679   /* Target PROGRAM is the first argument.  */
8680   Fput (Qcall_process, Qtarget_idx, make_number (0));
8681
8682   DEFSYM (Qcall_process_region, "call-process-region");
8683   /* Target PROGRAM is the third argument.  */
8684   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
8685
8686   DEFSYM (Qstart_process, "start-process");
8687   /* Target PROGRAM is the third argument.  */
8688   Fput (Qstart_process, Qtarget_idx, make_number (2));
8689
8690   DEFSYM (Qopen_network_stream, "open-network-stream");
8691   /* Target SERVICE is the fourth argument.  */
8692   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8693
8694   DEFSYM (Qcoding_system, "coding-system");
8695   DEFSYM (Qcoding_aliases, "coding-aliases");
8696
8697   DEFSYM (Qeol_type, "eol-type");
8698   DEFSYM (Qunix, "unix");
8699   DEFSYM (Qdos, "dos");
8700
8701   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8702   DEFSYM (Qpost_read_conversion, "post-read-conversion");
8703   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8704   DEFSYM (Qdefault_char, "default-char");
8705   DEFSYM (Qundecided, "undecided");
8706   DEFSYM (Qno_conversion, "no-conversion");
8707   DEFSYM (Qraw_text, "raw-text");
8708
8709   DEFSYM (Qiso_2022, "iso-2022");
8710
8711   DEFSYM (Qutf_8, "utf-8");
8712   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
8713
8714   DEFSYM (Qutf_16, "utf-16");
8715   DEFSYM (Qbig, "big");
8716   DEFSYM (Qlittle, "little");
8717
8718   DEFSYM (Qshift_jis, "shift-jis");
8719   DEFSYM (Qbig5, "big5");
8720
8721   DEFSYM (Qcoding_system_p, "coding-system-p");
8722
8723   DEFSYM (Qcoding_system_error, "coding-system-error");
8724   Fput (Qcoding_system_error, Qerror_conditions,
8725         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
8726   Fput (Qcoding_system_error, Qerror_message,
8727         build_string ("Invalid coding system"));
8728
8729   /* Intern this now in case it isn't already done.
8730      Setting this variable twice is harmless.
8731      But don't staticpro it here--that is done in alloc.c.  */
8732   Qchar_table_extra_slots = intern ("char-table-extra-slots");
8733
8734   DEFSYM (Qtranslation_table, "translation-table");
8735   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
8736   DEFSYM (Qtranslation_table_id, "translation-table-id");
8737   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8738   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
8739
8740   DEFSYM (Qvalid_codes, "valid-codes");
8741
8742   DEFSYM (Qemacs_mule, "emacs-mule");
8743
8744   DEFSYM (QCcategory, ":category");
8745
8746   Vcoding_category_table
8747     = Fmake_vector (make_number (coding_category_max), Qnil);
8748   staticpro (&Vcoding_category_table);
8749   /* Followings are target of code detection.  */
8750   ASET (Vcoding_category_table, coding_category_iso_7,
8751         intern ("coding-category-iso-7"));
8752   ASET (Vcoding_category_table, coding_category_iso_7_tight,
8753         intern ("coding-category-iso-7-tight"));
8754   ASET (Vcoding_category_table, coding_category_iso_8_1,
8755         intern ("coding-category-iso-8-1"));
8756   ASET (Vcoding_category_table, coding_category_iso_8_2,
8757         intern ("coding-category-iso-8-2"));
8758   ASET (Vcoding_category_table, coding_category_iso_7_else,
8759         intern ("coding-category-iso-7-else"));
8760   ASET (Vcoding_category_table, coding_category_iso_8_else,
8761         intern ("coding-category-iso-8-else"));
8762   ASET (Vcoding_category_table, coding_category_utf_8,
8763         intern ("coding-category-utf-8"));
8764   ASET (Vcoding_category_table, coding_category_utf_16_be,
8765         intern ("coding-category-utf-16-be"));
8766   ASET (Vcoding_category_table, coding_category_utf_16_auto,
8767         intern ("coding-category-utf-16-auto"));
8768   ASET (Vcoding_category_table, coding_category_utf_16_le,
8769         intern ("coding-category-utf-16-le"));
8770   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8771         intern ("coding-category-utf-16-be-nosig"));
8772   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8773         intern ("coding-category-utf-16-le-nosig"));
8774   ASET (Vcoding_category_table, coding_category_charset,
8775         intern ("coding-category-charset"));
8776   ASET (Vcoding_category_table, coding_category_sjis,
8777         intern ("coding-category-sjis"));
8778   ASET (Vcoding_category_table, coding_category_big5,
8779         intern ("coding-category-big5"));
8780   ASET (Vcoding_category_table, coding_category_ccl,
8781         intern ("coding-category-ccl"));
8782   ASET (Vcoding_category_table, coding_category_emacs_mule,
8783         intern ("coding-category-emacs-mule"));
8784   /* Followings are NOT target of code detection.  */
8785   ASET (Vcoding_category_table, coding_category_raw_text,
8786         intern ("coding-category-raw-text"));
8787   ASET (Vcoding_category_table, coding_category_undecided,
8788         intern ("coding-category-undecided"));
8789
8790   defsubr (&Scoding_system_p);
8791   defsubr (&Sread_coding_system);
8792   defsubr (&Sread_non_nil_coding_system);
8793   defsubr (&Scheck_coding_system);
8794   defsubr (&Sdetect_coding_region);
8795   defsubr (&Sdetect_coding_string);
8796   defsubr (&Sfind_coding_systems_region_internal);
8797   defsubr (&Sunencodable_char_position);
8798   defsubr (&Scheck_coding_systems_region);
8799   defsubr (&Sdecode_coding_region);
8800   defsubr (&Sencode_coding_region);
8801   defsubr (&Sdecode_coding_string);
8802   defsubr (&Sencode_coding_string);
8803   defsubr (&Sdecode_sjis_char);
8804   defsubr (&Sencode_sjis_char);
8805   defsubr (&Sdecode_big5_char);
8806   defsubr (&Sencode_big5_char);
8807   defsubr (&Sset_terminal_coding_system_internal);
8808   defsubr (&Sset_safe_terminal_coding_system_internal);
8809   defsubr (&Sterminal_coding_system);
8810   defsubr (&Sset_keyboard_coding_system_internal);
8811   defsubr (&Skeyboard_coding_system);
8812   defsubr (&Sfind_operation_coding_system);
8813   defsubr (&Sset_coding_system_priority);
8814   defsubr (&Sdefine_coding_system_internal);
8815   defsubr (&Sdefine_coding_system_alias);
8816   defsubr (&Scoding_system_base);
8817   defsubr (&Scoding_system_plist);
8818   defsubr (&Scoding_system_aliases);
8819   defsubr (&Scoding_system_eol_type);
8820   defsubr (&Scoding_system_priority_list);
8821
8822   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
8823                doc: /* List of coding systems.
8824
8825 Do not alter the value of this variable manually.  This variable should be
8826 updated by the functions `define-coding-system' and
8827 `define-coding-system-alias'.  */);
8828   Vcoding_system_list = Qnil;
8829
8830   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
8831                doc: /* Alist of coding system names.
8832 Each element is one element list of coding system name.
8833 This variable is given to `completing-read' as TABLE argument.
8834
8835 Do not alter the value of this variable manually.  This variable should be
8836 updated by the functions `make-coding-system' and
8837 `define-coding-system-alias'.  */);
8838   Vcoding_system_alist = Qnil;
8839
8840   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
8841                doc: /* List of coding-categories (symbols) ordered by priority.
8842
8843 On detecting a coding system, Emacs tries code detection algorithms
8844 associated with each coding-category one by one in this order.  When
8845 one algorithm agrees with a byte sequence of source text, the coding
8846 system bound to the corresponding coding-category is selected.  */);
8847   {
8848     int i;
8849
8850     Vcoding_category_list = Qnil;
8851     for (i = coding_category_max - 1; i >= 0; i--)
8852       Vcoding_category_list
8853         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8854                  Vcoding_category_list);
8855   }
8856
8857   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
8858                doc: /* Specify the coding system for read operations.
8859 It is useful to bind this variable with `let', but do not set it globally.
8860 If the value is a coding system, it is used for decoding on read operation.
8861 If not, an appropriate element is used from one of the coding system alists:
8862 There are three such tables, `file-coding-system-alist',
8863 `process-coding-system-alist', and `network-coding-system-alist'.  */);
8864   Vcoding_system_for_read = Qnil;
8865
8866   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
8867                doc: /* Specify the coding system for write operations.
8868 Programs bind this variable with `let', but you should not set it globally.
8869 If the value is a coding system, it is used for encoding of output,
8870 when writing it to a file and when sending it to a file or subprocess.
8871
8872 If this does not specify a coding system, an appropriate element
8873 is used from one of the coding system alists:
8874 There are three such tables, `file-coding-system-alist',
8875 `process-coding-system-alist', and `network-coding-system-alist'.
8876 For output to files, if the above procedure does not specify a coding system,
8877 the value of `buffer-file-coding-system' is used.  */);
8878   Vcoding_system_for_write = Qnil;
8879
8880   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
8881                doc: /*
8882 Coding system used in the latest file or process I/O.  */);
8883   Vlast_coding_system_used = Qnil;
8884
8885   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
8886                doc: /*
8887 *Non-nil means always inhibit code conversion of end-of-line format.
8888 See info node `Coding Systems' and info node `Text and Binary' concerning
8889 such conversion.  */);
8890   inhibit_eol_conversion = 0;
8891
8892   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
8893                doc: /*
8894 Non-nil means process buffer inherits coding system of process output.
8895 Bind it to t if the process output is to be treated as if it were a file
8896 read from some filesystem.  */);
8897   inherit_process_coding_system = 0;
8898
8899   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
8900                doc: /*
8901 Alist to decide a coding system to use for a file I/O operation.
8902 The format is ((PATTERN . VAL) ...),
8903 where PATTERN is a regular expression matching a file name,
8904 VAL is a coding system, a cons of coding systems, or a function symbol.
8905 If VAL is a coding system, it is used for both decoding and encoding
8906 the file contents.
8907 If VAL is a cons of coding systems, the car part is used for decoding,
8908 and the cdr part is used for encoding.
8909 If VAL is a function symbol, the function must return a coding system
8910 or a cons of coding systems which are used as above.  The function gets
8911 the arguments with which `find-operation-coding-systems' was called.
8912
8913 See also the function `find-operation-coding-system'
8914 and the variable `auto-coding-alist'.  */);
8915   Vfile_coding_system_alist = Qnil;
8916
8917   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
8918                doc: /*
8919 Alist to decide a coding system to use for a process I/O operation.
8920 The format is ((PATTERN . VAL) ...),
8921 where PATTERN is a regular expression matching a program name,
8922 VAL is a coding system, a cons of coding systems, or a function symbol.
8923 If VAL is a coding system, it is used for both decoding what received
8924 from the program and encoding what sent to the program.
8925 If VAL is a cons of coding systems, the car part is used for decoding,
8926 and the cdr part is used for encoding.
8927 If VAL is a function symbol, the function must return a coding system
8928 or a cons of coding systems which are used as above.
8929
8930 See also the function `find-operation-coding-system'.  */);
8931   Vprocess_coding_system_alist = Qnil;
8932
8933   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
8934                doc: /*
8935 Alist to decide a coding system to use for a network I/O operation.
8936 The format is ((PATTERN . VAL) ...),
8937 where PATTERN is a regular expression matching a network service name
8938 or is a port number to connect to,
8939 VAL is a coding system, a cons of coding systems, or a function symbol.
8940 If VAL is a coding system, it is used for both decoding what received
8941 from the network stream and encoding what sent to the network stream.
8942 If VAL is a cons of coding systems, the car part is used for decoding,
8943 and the cdr part is used for encoding.
8944 If VAL is a function symbol, the function must return a coding system
8945 or a cons of coding systems which are used as above.
8946
8947 See also the function `find-operation-coding-system'.  */);
8948   Vnetwork_coding_system_alist = Qnil;
8949
8950   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
8951                doc: /* Coding system to use with system messages.
8952 Also used for decoding keyboard input on X Window system.  */);
8953   Vlocale_coding_system = Qnil;
8954
8955   /* The eol mnemonics are reset in startup.el system-dependently.  */
8956   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
8957                doc: /*
8958 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
8959   eol_mnemonic_unix = build_string (":");
8960
8961   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
8962                doc: /*
8963 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
8964   eol_mnemonic_dos = build_string ("\\");
8965
8966   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
8967                doc: /*
8968 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
8969   eol_mnemonic_mac = build_string ("/");
8970
8971   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
8972                doc: /*
8973 *String displayed in mode line when end-of-line format is not yet determined.  */);
8974   eol_mnemonic_undecided = build_string (":");
8975
8976   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
8977                doc: /*
8978 *Non-nil enables character translation while encoding and decoding.  */);
8979   Venable_character_translation = Qt;
8980
8981   DEFVAR_LISP ("standard-translation-table-for-decode",
8982                &Vstandard_translation_table_for_decode,
8983                doc: /* Table for translating characters while decoding.  */);
8984   Vstandard_translation_table_for_decode = Qnil;
8985
8986   DEFVAR_LISP ("standard-translation-table-for-encode",
8987                &Vstandard_translation_table_for_encode,
8988                doc: /* Table for translating characters while encoding.  */);
8989   Vstandard_translation_table_for_encode = Qnil;
8990
8991   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
8992                doc: /* Alist of charsets vs revision numbers.
8993 While encoding, if a charset (car part of an element) is found,
8994 designate it with the escape sequence identifying revision (cdr part
8995 of the element).  */);
8996   Vcharset_revision_table = Qnil;
8997
8998   DEFVAR_LISP ("default-process-coding-system",
8999                &Vdefault_process_coding_system,
9000                doc: /* Cons of coding systems used for process I/O by default.
9001 The car part is used for decoding a process output,
9002 the cdr part is used for encoding a text to be sent to a process.  */);
9003   Vdefault_process_coding_system = Qnil;
9004
9005   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
9006                doc: /*
9007 Table of extra Latin codes in the range 128..159 (inclusive).
9008 This is a vector of length 256.
9009 If Nth element is non-nil, the existence of code N in a file
9010 \(or output of subprocess) doesn't prevent it to be detected as
9011 a coding system of ISO 2022 variant which has a flag
9012 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9013 or reading output of a subprocess.
9014 Only 128th through 159th elements has a meaning.  */);
9015   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
9016
9017   DEFVAR_LISP ("select-safe-coding-system-function",
9018                &Vselect_safe_coding_system_function,
9019                doc: /*
9020 Function to call to select safe coding system for encoding a text.
9021
9022 If set, this function is called to force a user to select a proper
9023 coding system which can encode the text in the case that a default
9024 coding system used in each operation can't encode the text.
9025
9026 The default value is `select-safe-coding-system' (which see).  */);
9027   Vselect_safe_coding_system_function = Qnil;
9028
9029   DEFVAR_BOOL ("coding-system-require-warning",
9030                &coding_system_require_warning,
9031                doc: /* Internal use only.
9032 If non-nil, on writing a file, `select-safe-coding-system-function' is
9033 called even if `coding-system-for-write' is non-nil.  The command
9034 `universal-coding-system-argument' binds this variable to t temporarily.  */);
9035   coding_system_require_warning = 0;
9036
9037
9038   DEFVAR_BOOL ("inhibit-iso-escape-detection",
9039                &inhibit_iso_escape_detection,
9040                doc: /*
9041 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9042
9043 By default, on reading a file, Emacs tries to detect how the text is
9044 encoded.  This code detection is sensitive to escape sequences.  If
9045 the sequence is valid as ISO2022, the code is determined as one of
9046 the ISO2022 encodings, and the file is decoded by the corresponding
9047 coding system (e.g. `iso-2022-7bit').
9048
9049 However, there may be a case that you want to read escape sequences in
9050 a file as is.  In such a case, you can set this variable to non-nil.
9051 Then, as the code detection ignores any escape sequences, no file is
9052 detected as encoded in some ISO2022 encoding.  The result is that all
9053 escape sequences become visible in a buffer.
9054
9055 The default value is nil, and it is strongly recommended not to change
9056 it.  That is because many Emacs Lisp source files that contain
9057 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9058 in Emacs's distribution, and they won't be decoded correctly on
9059 reading if you suppress escape sequence detection.
9060
9061 The other way to read escape sequences in a file without decoding is
9062 to explicitly specify some coding system that doesn't use ISO2022's
9063 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
9064   inhibit_iso_escape_detection = 0;
9065
9066   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
9067                doc: /* Char table for translating self-inserting characters.
9068 This is applied to the result of input methods, not their input.  See also
9069 `keyboard-translate-table'.  */);
9070     Vtranslation_table_for_input = Qnil;
9071
9072   {
9073     Lisp_Object args[coding_arg_max];
9074     Lisp_Object plist[16];
9075     int i;
9076
9077     for (i = 0; i < coding_arg_max; i++)
9078       args[i] = Qnil;
9079
9080     plist[0] = intern (":name");
9081     plist[1] = args[coding_arg_name] = Qno_conversion;
9082     plist[2] = intern (":mnemonic");
9083     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
9084     plist[4] = intern (":coding-type");
9085     plist[5] = args[coding_arg_coding_type] = Qraw_text;
9086     plist[6] = intern (":ascii-compatible-p");
9087     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
9088     plist[8] = intern (":default-char");
9089     plist[9] = args[coding_arg_default_char] = make_number (0);
9090     plist[10] = intern (":for-unibyte");
9091     plist[11] = args[coding_arg_for_unibyte] = Qt;
9092     plist[12] = intern (":docstring");
9093     plist[13] = build_string ("Do no conversion.\n\
9094 \n\
9095 When you visit a file with this coding, the file is read into a\n\
9096 unibyte buffer as is, thus each byte of a file is treated as a\n\
9097 character.");
9098     plist[14] = intern (":eol-type");
9099     plist[15] = args[coding_arg_eol_type] = Qunix;
9100     args[coding_arg_plist] = Flist (16, plist);
9101     Fdefine_coding_system_internal (coding_arg_max, args);
9102   }
9103
9104   setup_coding_system (Qno_conversion, &keyboard_coding);
9105   setup_coding_system (Qno_conversion, &terminal_coding);
9106   setup_coding_system (Qno_conversion, &safe_terminal_coding);
9107
9108   {
9109     int i;
9110
9111     for (i = 0; i < coding_category_max; i++)
9112       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
9113   }
9114 }
9115
9116 char *
9117 emacs_strerror (error_number)
9118      int error_number;
9119 {
9120   char *str;
9121
9122   synchronize_system_messages_locale ();
9123   str = strerror (error_number);
9124
9125   if (! NILP (Vlocale_coding_system))
9126     {
9127       Lisp_Object dec = code_convert_string_norecord (build_string (str),
9128                                                       Vlocale_coding_system,
9129                                                       0);
9130       str = (char *) SDATA (dec);
9131     }
9132
9133   return str;
9134 }
9135
9136 #endif /* emacs */