src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001, 2002 Free Software Foundation, Inc.
   5    Copyright (C) 2001, 2002
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H13PRO009
   8
   9 This file is part of GNU Emacs.
  10
  11 GNU Emacs is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU General Public License as published by
  13 the Free Software Foundation; either version 2, or (at your option)
  14 any later version.
  15
  16 GNU Emacs is distributed in the hope that it will be useful,
  17 but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 GNU General Public License for more details.
  20
  21 You should have received a copy of the GNU General Public License
  22 along with GNU Emacs; see the file COPYING.  If not, write to
  23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  24 Boston, MA 02111-1307, USA.  */
  25
  26 /*** TABLE OF CONTENTS ***
  27
  28   0. General comments
  29   1. Preamble
  30   2. Emacs' internal format (emacs-utf-8) handlers
  31   3. UTF-8 handlers
  32   4. UTF-16 handlers
  33   5. Charset-base coding systems handlers
  34   6. emacs-mule (old Emacs' internal format) handlers
  35   7. ISO2022 handlers
  36   8. Shift-JIS and BIG5 handlers
  37   9. CCL handlers
  38   10. C library functions
  39   11. Emacs Lisp library functions
  40   12. Postamble
  41
  42 */
  43
  44 /*** 0. General comments ***
  45
  46
  47 CODING SYSTEM
  48
  49   A coding system is an object for an encoding mechanism that contains
  50   information about how to convert byte sequences to character
  51   sequences and vice versa.  When we say "decode", it means converting
  52   a byte sequence of a specific coding system into a character
  53   sequence that is represented by Emacs' internal coding system
  54   `emacs-utf-8', and when we say "encode", it means converting a
  55   character sequence of emacs-utf-8 to a byte sequence of a specific
  56   coding system.
  57
  58   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  59   C level, a coding system is represented by a vector of attributes
  60   stored in the hash table Vcharset_hash_table.  The conversion from
  61   coding system symbol to attributes vector is done by looking up
  62   Vcharset_hash_table by the symbol.
  63
  64   Coding systems are classified into the following types depending on
  65   the encoding mechanism.  Here's a brief description of the types.
  66
  67   o UTF-8
  68
  69   o UTF-16
  70
  71   o Charset-base coding system
  72
  73   A coding system defined by one or more (coded) character sets.
  74   Decoding and encoding are done by a code converter defined for each
  75   character set.
  76
  77   o Old Emacs internal format (emacs-mule)
  78
  79   The coding system adopted by old versions of Emacs (20 and 21).
  80
  81   o ISO2022-base coding system
  82
  83   The most famous coding system for multiple character sets.  X's
  84   Compound Text, various EUCs (Extended Unix Code), and coding systems
  85   used in the Internet communication such as ISO-2022-JP are all
  86   variants of ISO2022.
  87
  88   o SJIS (or Shift-JIS or MS-Kanji-Code)
  89
  90   A coding system to encode character sets: ASCII, JISX0201, and
  91   JISX0208.  Widely used for PC's in Japan.  Details are described in
  92   section 8.
  93
  94   o BIG5
  95
  96   A coding system to encode character sets: ASCII and Big5.  Widely
  97   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  98   described in section 8.  In this file, when we write "big5" (all
  99   lowercase), we mean the coding system, and when we write "Big5"
 100   (capitalized), we mean the character set.
 101
 102   o CCL
 103
 104   If a user wants to decode/encode text encoded in a coding system
 105   not listed above, he can supply a decoder and an encoder for it in
 106   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 107   program while decoding/encoding.
 108
 109   o Raw-text
 110
 111   A coding system for text containing raw eight-bit data.  Emacs
 112   treats each byte of source text as a character (except for
 113   end-of-line conversion).
 114
 115   o No-conversion
 116
 117   Like raw text, but don't do end-of-line conversion.
 118
 119
 120 END-OF-LINE FORMAT
 121
 122   How text end-of-line is encoded depends on operating system.  For
 123   instance, Unix's format is just one byte of LF (line-feed) code,
 124   whereas DOS's format is two-byte sequence of `carriage-return' and
 125   `line-feed' codes.  MacOS's format is usually one byte of
 126   `carriage-return'.
 127
 128   Since text character encoding and end-of-line encoding are
 129   independent, any coding system described above can take any format
 130   of end-of-line (except for no-conversion).
 131
 132 STRUCT CODING_SYSTEM
 133
 134   Before using a coding system for code conversion (i.e. decoding and
 135   encoding), we setup a structure of type `struct coding_system'.
 136   This structure keeps various information about a specific code
 137   conversion (e.g. the location of source and destination data).
 138
 139 */
 140
 141 /* COMMON MACROS */
 142
 143
 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 145
 146   These functions check if a byte sequence specified as a source in
 147   CODING conforms to the format of XXX, and update the members of
 148   DETECT_INFO.
 149
 150   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 151
 152   Below is the template of these functions.  */
 153
 154 #if 0
 155 static int
 156 detect_coding_XXX (coding, detect_info)
 157      struct coding_system *coding;
 158      struct coding_detection_info *detect_info;
 159 {
 160   unsigned char *src = coding->source;
 161   unsigned char *src_end = coding->source + coding->src_bytes;
 162   int multibytep = coding->src_multibyte;
 163   int consumed_chars = 0;
 164   int found = 0;
 165   ...;
 166
 167   while (1)
 168     {
 169       /* Get one byte from the source.  If the souce is exausted, jump
 170          to no_more_source:.  */
 171       ONE_MORE_BYTE (c);
 172
 173       if (! __C_conforms_to_XXX___ (c))
 174         break;
 175       if (! __C_strongly_suggests_XXX__ (c))
 176         found = CATEGORY_MASK_XXX;
 177     }
 178   /* The byte sequence is invalid for XXX.  */
 179   detect_info->rejected |= CATEGORY_MASK_XXX;
 180   return 0;
 181
 182  no_more_source:
 183   /* The source exausted successfully.  */
 184   detect_info->found |= found;
 185   return 1;
 186 }
 187 #endif
 188
 189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 190
 191   These functions decode a byte sequence specified as a source by
 192   CODING.  The resulting multibyte text goes to a place pointed to by
 193   CODING->charbuf, the length of which should not exceed
 194   CODING->charbuf_size;
 195
 196   These functions set the information of original and decoded texts in
 197   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 198   They also set CODING->result to one of CODING_RESULT_XXX indicating
 199   how the decoding is finished.
 200
 201   Below is the template of these functions.  */
 202
 203 #if 0
 204 static void
 205 decode_coding_XXXX (coding)
 206      struct coding_system *coding;
 207 {
 208   unsigned char *src = coding->source + coding->consumed;
 209   unsigned char *src_end = coding->source + coding->src_bytes;
 210   /* SRC_BASE remembers the start position in source in each loop.
 211      The loop will be exited when there's not enough source code, or
 212      when there's no room in CHARBUF for a decoded character.  */
 213   unsigned char *src_base;
 214   /* A buffer to produce decoded characters.  */
 215   int *charbuf = coding->charbuf;
 216   int *charbuf_end = charbuf + coding->charbuf_size;
 217   int multibytep = coding->src_multibyte;
 218
 219   while (1)
 220     {
 221       src_base = src;
 222       if (charbuf < charbuf_end)
 223         /* No more room to produce a decoded character.  */
 224         break;
 225       ONE_MORE_BYTE (c);
 226       /* Decode it. */
 227     }
 228
 229  no_more_source:
 230   if (src_base < src_end
 231       && coding->mode & CODING_MODE_LAST_BLOCK)
 232     /* If the source ends by partial bytes to construct a character,
 233        treat them as eight-bit raw data.  */
 234     while (src_base < src_end && charbuf < charbuf_end)
 235       *charbuf++ = *src_base++;
 236   /* Remember how many bytes and characters we consumed.  If the
 237      source is multibyte, the bytes and chars are not identical.  */
 238   coding->consumed = coding->consumed_char = src_base - coding->source;
 239   /* Remember how many characters we produced.  */
 240   coding->charbuf_used = charbuf - coding->charbuf;
 241 }
 242 #endif
 243
 244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 245
 246   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 247   internal multibyte format by CODING.  The resulting byte sequence
 248   goes to a place pointed to by DESTINATION, the length of which
 249   should not exceed DST_BYTES.
 250
 251   These functions set the information of original and encoded texts in
 252   the members produced, produced_char, consumed, and consumed_char of
 253   the structure *CODING.  They also set the member result to one of
 254   CODING_RESULT_XXX indicating how the encoding finished.
 255
 256   DST_BYTES zero means that source area and destination area are
 257   overlapped, which means that we can produce a encoded text until it
 258   reaches at the head of not-yet-encoded source text.
 259
 260   Below is a template of these functions.  */
 261 #if 0
 262 static void
 263 encode_coding_XXX (coding)
 264      struct coding_system *coding;
 265 {
 266   int multibytep = coding->dst_multibyte;
 267   int *charbuf = coding->charbuf;
 268   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 269   unsigned char *dst = coding->destination + coding->produced;
 270   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 271   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 272   int produced_chars = 0;
 273
 274   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 275     {
 276       int c = *charbuf;
 277       /* Encode C into DST, and increment DST.  */
 278     }
 279  label_no_more_destination:
 280   /* How many chars and bytes we produced.  */
 281   coding->produced_char += produced_chars;
 282   coding->produced = dst - coding->destination;
 283 }
 284 #endif
 285
 286 \f
 287 /*** 1. Preamble ***/
 288
 289 #include <config.h>
 290 #include <stdio.h>
 291
 292 #include "lisp.h"
 293 #include "buffer.h"
 294 #include "character.h"
 295 #include "charset.h"
 296 #include "ccl.h"
 297 #include "composite.h"
 298 #include "coding.h"
 299 #include "window.h"
 300
 301 Lisp_Object Vcoding_system_hash_table;
 302
 303 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 304 Lisp_Object Qunix, Qdos;
 305 extern Lisp_Object Qmac;        /* frame.c */
 306 Lisp_Object Qbuffer_file_coding_system;
 307 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 308 Lisp_Object Qdefault_char;
 309 Lisp_Object Qno_conversion, Qundecided;
 310 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 311 Lisp_Object Qbig, Qlittle;
 312 Lisp_Object Qcoding_system_history;
 313 Lisp_Object Qvalid_codes;
 314 Lisp_Object QCcategory;
 315
 316 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 317 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 318 Lisp_Object Qstart_process, Qopen_network_stream;
 319 Lisp_Object Qtarget_idx;
 320
 321 Lisp_Object Vselect_safe_coding_system_function;
 322
 323 /* Mnemonic string for each format of end-of-line.  */
 324 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 325 /* Mnemonic string to indicate format of end-of-line is not yet
 326    decided.  */
 327 Lisp_Object eol_mnemonic_undecided;
 328
 329 #ifdef emacs
 330
 331 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 332
 333 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 334
 335 /* Coding system emacs-mule and raw-text are for converting only
 336    end-of-line format.  */
 337 Lisp_Object Qemacs_mule, Qraw_text;
 338
 339 /* Coding-systems are handed between Emacs Lisp programs and C internal
 340    routines by the following three variables.  */
 341 /* Coding-system for reading files and receiving data from process.  */
 342 Lisp_Object Vcoding_system_for_read;
 343 /* Coding-system for writing files and sending data to process.  */
 344 Lisp_Object Vcoding_system_for_write;
 345 /* Coding-system actually used in the latest I/O.  */
 346 Lisp_Object Vlast_coding_system_used;
 347
 348 /* A vector of length 256 which contains information about special
 349    Latin codes (especially for dealing with Microsoft codes).  */
 350 Lisp_Object Vlatin_extra_code_table;
 351
 352 /* Flag to inhibit code conversion of end-of-line format.  */
 353 int inhibit_eol_conversion;
 354
 355 /* Flag to inhibit ISO2022 escape sequence detection.  */
 356 int inhibit_iso_escape_detection;
 357
 358 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 359 int inherit_process_coding_system;
 360
 361 /* Coding system to be used to encode text for terminal display.  */
 362 struct coding_system terminal_coding;
 363
 364 /* Coding system to be used to encode text for terminal display when
 365    terminal coding system is nil.  */
 366 struct coding_system safe_terminal_coding;
 367
 368 /* Coding system of what is sent from terminal keyboard.  */
 369 struct coding_system keyboard_coding;
 370
 371 Lisp_Object Vfile_coding_system_alist;
 372 Lisp_Object Vprocess_coding_system_alist;
 373 Lisp_Object Vnetwork_coding_system_alist;
 374
 375 Lisp_Object Vlocale_coding_system;
 376
 377 #endif /* emacs */
 378
 379 /* Flag to tell if we look up translation table on character code
 380    conversion.  */
 381 Lisp_Object Venable_character_translation;
 382 /* Standard translation table to look up on decoding (reading).  */
 383 Lisp_Object Vstandard_translation_table_for_decode;
 384 /* Standard translation table to look up on encoding (writing).  */
 385 Lisp_Object Vstandard_translation_table_for_encode;
 386
 387 Lisp_Object Qtranslation_table;
 388 Lisp_Object Qtranslation_table_id;
 389 Lisp_Object Qtranslation_table_for_decode;
 390 Lisp_Object Qtranslation_table_for_encode;
 391
 392 /* Alist of charsets vs revision number.  */
 393 static Lisp_Object Vcharset_revision_table;
 394
 395 /* Default coding systems used for process I/O.  */
 396 Lisp_Object Vdefault_process_coding_system;
 397
 398 /* Global flag to tell that we can't call post-read-conversion and
 399    pre-write-conversion functions.  Usually the value is zero, but it
 400    is set to 1 temporarily while such functions are running.  This is
 401    to avoid infinite recursive call.  */
 402 static int inhibit_pre_post_conversion;
 403
 404 /* Two special coding systems.  */
 405 Lisp_Object Vsjis_coding_system;
 406 Lisp_Object Vbig5_coding_system;
 407
 408
 409 static int detect_coding_utf_8 P_ ((struct coding_system *,
 410                                     struct coding_detection_info *info));
 411 static void decode_coding_utf_8 P_ ((struct coding_system *));
 412 static int encode_coding_utf_8 P_ ((struct coding_system *));
 413
 414 static int detect_coding_utf_16 P_ ((struct coding_system *,
 415                                      struct coding_detection_info *info));
 416 static void decode_coding_utf_16 P_ ((struct coding_system *));
 417 static int encode_coding_utf_16 P_ ((struct coding_system *));
 418
 419 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 420                                        struct coding_detection_info *info));
 421 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 422 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 423
 424 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 425                                          struct coding_detection_info *info));
 426 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 427 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 428
 429 static int detect_coding_sjis P_ ((struct coding_system *,
 430                                    struct coding_detection_info *info));
 431 static void decode_coding_sjis P_ ((struct coding_system *));
 432 static int encode_coding_sjis P_ ((struct coding_system *));
 433
 434 static int detect_coding_big5 P_ ((struct coding_system *,
 435                                    struct coding_detection_info *info));
 436 static void decode_coding_big5 P_ ((struct coding_system *));
 437 static int encode_coding_big5 P_ ((struct coding_system *));
 438
 439 static int detect_coding_ccl P_ ((struct coding_system *,
 440                                   struct coding_detection_info *info));
 441 static void decode_coding_ccl P_ ((struct coding_system *));
 442 static int encode_coding_ccl P_ ((struct coding_system *));
 443
 444 static void decode_coding_raw_text P_ ((struct coding_system *));
 445 static int encode_coding_raw_text P_ ((struct coding_system *));
 446
 447
 448 /* ISO2022 section */
 449
 450 #define CODING_ISO_INITIAL(coding, reg)                 \
 451   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 452                      coding_attr_iso_initial),          \
 453                reg)))
 454
 455
 456 #define CODING_ISO_REQUEST(coding, charset_id)  \
 457   ((charset_id <= (coding)->max_charset_id      \
 458     ? (coding)->safe_charsets[charset_id]       \
 459     : -1))
 460
 461
 462 #define CODING_ISO_FLAGS(coding)        \
 463   ((coding)->spec.iso_2022.flags)
 464 #define CODING_ISO_DESIGNATION(coding, reg)     \
 465   ((coding)->spec.iso_2022.current_designation[reg])
 466 #define CODING_ISO_INVOCATION(coding, plane)    \
 467   ((coding)->spec.iso_2022.current_invocation[plane])
 468 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 469   ((coding)->spec.iso_2022.single_shifting)
 470 #define CODING_ISO_BOL(coding)  \
 471   ((coding)->spec.iso_2022.bol)
 472 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 473   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 474
 475 /* Control characters of ISO2022.  */
 476                         /* code */      /* function */
 477 #define ISO_CODE_LF     0x0A            /* line-feed */
 478 #define ISO_CODE_CR     0x0D            /* carriage-return */
 479 #define ISO_CODE_SO     0x0E            /* shift-out */
 480 #define ISO_CODE_SI     0x0F            /* shift-in */
 481 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 482 #define ISO_CODE_ESC    0x1B            /* escape */
 483 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 484 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 485 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 486
 487 /* All code (1-byte) of ISO2022 is classified into one of the
 488    followings.  */
 489 enum iso_code_class_type
 490   {
 491     ISO_control_0,              /* Control codes in the range
 492                                    0x00..0x1F and 0x7F, except for the
 493                                    following 5 codes.  */
 494     ISO_carriage_return,        /* ISO_CODE_CR (0x0D) */
 495     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 496     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 497     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 498     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 499     ISO_control_1,              /* Control codes in the range
 500                                    0x80..0x9F, except for the
 501                                    following 3 codes.  */
 502     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 503     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 504     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 505     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 506     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 507     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 508     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 509   };
 510
 511 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 512     `iso-flags' attribute of an iso2022 coding system.  */
 513
 514 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 515    instead of the correct short-form sequence (e.g. ESC $ A).  */
 516 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 517
 518 /* If set, reset graphic planes and registers at end-of-line to the
 519    initial state.  */
 520 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 521
 522 /* If set, reset graphic planes and registers before any control
 523    characters to the initial state.  */
 524 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 525
 526 /* If set, encode by 7-bit environment.  */
 527 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 528
 529 /* If set, use locking-shift function.  */
 530 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 531
 532 /* If set, use single-shift function.  Overwrite
 533    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 534 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 535
 536 /* If set, use designation escape sequence.  */
 537 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 538
 539 /* If set, produce revision number sequence.  */
 540 #define CODING_ISO_FLAG_REVISION        0x0080
 541
 542 /* If set, produce ISO6429's direction specifying sequence.  */
 543 #define CODING_ISO_FLAG_DIRECTION       0x0100
 544
 545 /* If set, assume designation states are reset at beginning of line on
 546    output.  */
 547 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 548
 549 /* If set, designation sequence should be placed at beginning of line
 550    on output.  */
 551 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 552
 553 /* If set, do not encode unsafe charactes on output.  */
 554 #define CODING_ISO_FLAG_SAFE            0x0800
 555
 556 /* If set, extra latin codes (128..159) are accepted as a valid code
 557    on input.  */
 558 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 559
 560 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 561
 562 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 563
 564 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 565
 566 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 567
 568 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 569
 570 /* A character to be produced on output if encoding of the original
 571    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 572 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 573
 574
 575 /* UTF-16 section */
 576 #define CODING_UTF_16_BOM(coding)       \
 577   ((coding)->spec.utf_16.bom)
 578
 579 #define CODING_UTF_16_ENDIAN(coding)    \
 580   ((coding)->spec.utf_16.endian)
 581
 582 #define CODING_UTF_16_SURROGATE(coding) \
 583   ((coding)->spec.utf_16.surrogate)
 584
 585
 586 /* CCL section */
 587 #define CODING_CCL_DECODER(coding)      \
 588   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 589 #define CODING_CCL_ENCODER(coding)      \
 590   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 591 #define CODING_CCL_VALIDS(coding)                                          \
 592   (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
 593    ->data)
 594
 595 /* Index for each coding category in `coding_categories' */
 596
 597 enum coding_category
 598   {
 599     coding_category_iso_7,
 600     coding_category_iso_7_tight,
 601     coding_category_iso_8_1,
 602     coding_category_iso_8_2,
 603     coding_category_iso_7_else,
 604     coding_category_iso_8_else,
 605     coding_category_utf_8,
 606     coding_category_utf_16_auto,
 607     coding_category_utf_16_be,
 608     coding_category_utf_16_le,
 609     coding_category_utf_16_be_nosig,
 610     coding_category_utf_16_le_nosig,
 611     coding_category_charset,
 612     coding_category_sjis,
 613     coding_category_big5,
 614     coding_category_ccl,
 615     coding_category_emacs_mule,
 616     /* All above are targets of code detection.  */
 617     coding_category_raw_text,
 618     coding_category_undecided,
 619     coding_category_max
 620   };
 621
 622 /* Definitions of flag bits used in detect_coding_XXXX.  */
 623 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 624 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 625 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 626 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 627 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 628 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 629 #define CATEGORY_MASK_UTF_8             (1 << coding_category_utf_8)
 630 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 631 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 632 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 633 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 634 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 635 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 636 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 637 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 638 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 639 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 640 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 641
 642 /* This value is returned if detect_coding_mask () find nothing other
 643    than ASCII characters.  */
 644 #define CATEGORY_MASK_ANY               \
 645   (CATEGORY_MASK_ISO_7                  \
 646    | CATEGORY_MASK_ISO_7_TIGHT          \
 647    | CATEGORY_MASK_ISO_8_1              \
 648    | CATEGORY_MASK_ISO_8_2              \
 649    | CATEGORY_MASK_ISO_7_ELSE           \
 650    | CATEGORY_MASK_ISO_8_ELSE           \
 651    | CATEGORY_MASK_UTF_8                \
 652    | CATEGORY_MASK_UTF_16_BE            \
 653    | CATEGORY_MASK_UTF_16_LE            \
 654    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 655    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 656    | CATEGORY_MASK_CHARSET              \
 657    | CATEGORY_MASK_SJIS                 \
 658    | CATEGORY_MASK_BIG5                 \
 659    | CATEGORY_MASK_CCL                  \
 660    | CATEGORY_MASK_EMACS_MULE)
 661
 662
 663 #define CATEGORY_MASK_ISO_7BIT \
 664   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 665
 666 #define CATEGORY_MASK_ISO_8BIT \
 667   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 668
 669 #define CATEGORY_MASK_ISO_ELSE \
 670   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 671
 672 #define CATEGORY_MASK_ISO_ESCAPE        \
 673   (CATEGORY_MASK_ISO_7                  \
 674    | CATEGORY_MASK_ISO_7_TIGHT          \
 675    | CATEGORY_MASK_ISO_7_ELSE           \
 676    | CATEGORY_MASK_ISO_8_ELSE)
 677
 678 #define CATEGORY_MASK_ISO       \
 679   (  CATEGORY_MASK_ISO_7BIT     \
 680      | CATEGORY_MASK_ISO_8BIT   \
 681      | CATEGORY_MASK_ISO_ELSE)
 682
 683 #define CATEGORY_MASK_UTF_16            \
 684   (CATEGORY_MASK_UTF_16_BE              \
 685    | CATEGORY_MASK_UTF_16_LE            \
 686    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 687    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 688
 689
 690 /* List of symbols `coding-category-xxx' ordered by priority.  This
 691    variable is exposed to Emacs Lisp.  */
 692 static Lisp_Object Vcoding_category_list;
 693
 694 /* Table of coding categories (Lisp symbols).  This variable is for
 695    internal use oly.  */
 696 static Lisp_Object Vcoding_category_table;
 697
 698 /* Table of coding-categories ordered by priority.  */
 699 static enum coding_category coding_priorities[coding_category_max];
 700
 701 /* Nth element is a coding context for the coding system bound to the
 702    Nth coding category.  */
 703 static struct coding_system coding_categories[coding_category_max];
 704
 705 /*** Commonly used macros and functions ***/
 706
 707 #ifndef min
 708 #define min(a, b) ((a) < (b) ? (a) : (b))
 709 #endif
 710 #ifndef max
 711 #define max(a, b) ((a) > (b) ? (a) : (b))
 712 #endif
 713
 714 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list)  \
 715   do {                                                          \
 716     attrs = CODING_ID_ATTRS (coding->id);                       \
 717     eol_type = CODING_ID_EOL_TYPE (coding->id);                 \
 718     if (VECTORP (eol_type))                                     \
 719       eol_type = Qunix;                                         \
 720     charset_list = CODING_ATTR_CHARSET_LIST (attrs);            \
 721   } while (0)
 722
 723
 724 /* Safely get one byte from the source text pointed by SRC which ends
 725    at SRC_END, and set C to that byte.  If there are not enough bytes
 726    in the source, it jumps to `no_more_source'.  The caller
 727    should declare and set these variables appropriately in advance:
 728         src, src_end, multibytep
 729 */
 730
 731 #define ONE_MORE_BYTE(c)                                        \
 732   do {                                                          \
 733     if (src == src_end)                                         \
 734       {                                                         \
 735         if (src_base < src)                                     \
 736           coding->result = CODING_RESULT_INSUFFICIENT_SRC;      \
 737         goto no_more_source;                                    \
 738       }                                                         \
 739     c = *src++;                                                 \
 740     if (multibytep && (c & 0x80))                               \
 741       {                                                         \
 742         if ((c & 0xFE) != 0xC0)                                 \
 743           error ("Undecodable char found");                     \
 744         c = ((c & 1) << 6) | *src++;                            \
 745       }                                                         \
 746     consumed_chars++;                                           \
 747   } while (0)
 748
 749
 750 #define ONE_MORE_BYTE_NO_CHECK(c)               \
 751   do {                                          \
 752     c = *src++;                                 \
 753     if (multibytep && (c & 0x80))               \
 754       {                                         \
 755         if ((c & 0xFE) != 0xC0)                 \
 756           error ("Undecodable char found");     \
 757         c = ((c & 1) << 6) | *src++;            \
 758       }                                         \
 759     consumed_chars++;                           \
 760   } while (0)
 761
 762
 763 /* Store a byte C in the place pointed by DST and increment DST to the
 764    next free point, and increment PRODUCED_CHARS.  The caller should
 765    assure that C is 0..127, and declare and set the variable `dst'
 766    appropriately in advance.
 767 */
 768
 769
 770 #define EMIT_ONE_ASCII_BYTE(c)  \
 771   do {                          \
 772     produced_chars++;           \
 773     *dst++ = (c);               \
 774   } while (0)
 775
 776
 777 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 778
 779 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 780   do {                                  \
 781     produced_chars += 2;                \
 782     *dst++ = (c1), *dst++ = (c2);       \
 783   } while (0)
 784
 785
 786 /* Store a byte C in the place pointed by DST and increment DST to the
 787    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 788    nonzero, store in an appropriate multibyte from.  The caller should
 789    declare and set the variables `dst' and `multibytep' appropriately
 790    in advance.  */
 791
 792 #define EMIT_ONE_BYTE(c)                \
 793   do {                                  \
 794     produced_chars++;                   \
 795     if (multibytep)                     \
 796       {                                 \
 797         int ch = (c);                   \
 798         if (ch >= 0x80)                 \
 799           ch = BYTE8_TO_CHAR (ch);      \
 800         CHAR_STRING_ADVANCE (ch, dst);  \
 801       }                                 \
 802     else                                \
 803       *dst++ = (c);                     \
 804   } while (0)
 805
 806
 807 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 808
 809 #define EMIT_TWO_BYTES(c1, c2)          \
 810   do {                                  \
 811     produced_chars += 2;                \
 812     if (multibytep)                     \
 813       {                                 \
 814         int ch;                         \
 815                                         \
 816         ch = (c1);                      \
 817         if (ch >= 0x80)                 \
 818           ch = BYTE8_TO_CHAR (ch);      \
 819         CHAR_STRING_ADVANCE (ch, dst);  \
 820         ch = (c2);                      \
 821         if (ch >= 0x80)                 \
 822           ch = BYTE8_TO_CHAR (ch);      \
 823         CHAR_STRING_ADVANCE (ch, dst);  \
 824       }                                 \
 825     else                                \
 826       {                                 \
 827         *dst++ = (c1);                  \
 828         *dst++ = (c2);                  \
 829       }                                 \
 830   } while (0)
 831
 832
 833 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 834   do {                                  \
 835     EMIT_ONE_BYTE (c1);                 \
 836     EMIT_TWO_BYTES (c2, c3);            \
 837   } while (0)
 838
 839
 840 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 841   do {                                          \
 842     EMIT_TWO_BYTES (c1, c2);                    \
 843     EMIT_TWO_BYTES (c3, c4);                    \
 844   } while (0)
 845
 846
 847 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 848   do {                                                                       \
 849     charset_map_loaded = 0;                                                  \
 850     c = DECODE_CHAR (charset, code);                                         \
 851     if (charset_map_loaded)                                                  \
 852       {                                                                      \
 853         unsigned char *orig = coding->source;                                \
 854         EMACS_INT offset;                                                    \
 855                                                                              \
 856         coding_set_source (coding);                                          \
 857         offset = coding->source - orig;                                      \
 858         src += offset;                                                       \
 859         src_base += offset;                                                  \
 860         src_end += offset;                                                   \
 861       }                                                                      \
 862   } while (0)
 863
 864
 865 #define ASSURE_DESTINATION(bytes)                               \
 866   do {                                                          \
 867     if (dst + (bytes) >= dst_end)                               \
 868       {                                                         \
 869         int more_bytes = charbuf_end - charbuf + (bytes);       \
 870                                                                 \
 871         dst = alloc_destination (coding, more_bytes, dst);      \
 872         dst_end = coding->destination + coding->dst_bytes;      \
 873       }                                                         \
 874   } while (0)
 875
 876
 877
 878 static void
 879 coding_set_source (coding)
 880      struct coding_system *coding;
 881 {
 882   if (BUFFERP (coding->src_object))
 883     {
 884       struct buffer *buf = XBUFFER (coding->src_object);
 885
 886       if (coding->src_pos < 0)
 887         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
 888       else
 889         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
 890     }
 891   else if (STRINGP (coding->src_object))
 892     {
 893       coding->source = (XSTRING (coding->src_object)->data
 894                         + coding->src_pos_byte);
 895     }
 896   else
 897     /* Otherwise, the source is C string and is never relocated
 898        automatically.  Thus we don't have to update anything.  */
 899     ;
 900 }
 901
 902 static void
 903 coding_set_destination (coding)
 904      struct coding_system *coding;
 905 {
 906   if (BUFFERP (coding->dst_object))
 907     {
 908       if (coding->src_pos < 0)
 909         {
 910           coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
 911           coding->dst_bytes = (GAP_END_ADDR
 912                                - (coding->src_bytes - coding->consumed)
 913                                - coding->destination);
 914         }
 915       else
 916         {
 917           /* We are sure that coding->dst_pos_byte is before the gap
 918              of the buffer. */
 919           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
 920                                  + coding->dst_pos_byte - 1);
 921           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
 922                                - coding->destination);
 923         }
 924     }
 925   else
 926     /* Otherwise, the destination is C string and is never relocated
 927        automatically.  Thus we don't have to update anything.  */
 928     ;
 929 }
 930
 931
 932 static void
 933 coding_alloc_by_realloc (coding, bytes)
 934      struct coding_system *coding;
 935      EMACS_INT bytes;
 936 {
 937   coding->destination = (unsigned char *) xrealloc (coding->destination,
 938                                                     coding->dst_bytes + bytes);
 939   coding->dst_bytes += bytes;
 940 }
 941
 942 static void
 943 coding_alloc_by_making_gap (coding, bytes)
 944      struct coding_system *coding;
 945      EMACS_INT bytes;
 946 {
 947   if (BUFFERP (coding->dst_object)
 948       && EQ (coding->src_object, coding->dst_object))
 949     {
 950       EMACS_INT add = coding->src_bytes - coding->consumed;
 951
 952       GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
 953       make_gap (bytes);
 954       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
 955     }
 956   else
 957     {
 958       Lisp_Object this_buffer;
 959
 960       this_buffer = Fcurrent_buffer ();
 961       set_buffer_internal (XBUFFER (coding->dst_object));
 962       make_gap (bytes);
 963       set_buffer_internal (XBUFFER (this_buffer));
 964     }
 965 }
 966
 967
 968 static unsigned char *
 969 alloc_destination (coding, nbytes, dst)
 970      struct coding_system *coding;
 971      int nbytes;
 972      unsigned char *dst;
 973 {
 974   EMACS_INT offset = dst - coding->destination;
 975
 976   if (BUFFERP (coding->dst_object))
 977     coding_alloc_by_making_gap (coding, nbytes);
 978   else
 979     coding_alloc_by_realloc (coding, nbytes);
 980   coding->result = CODING_RESULT_SUCCESS;
 981   coding_set_destination (coding);
 982   dst = coding->destination + offset;
 983   return dst;
 984 }
 985
 986 /** Macros for annotations.  */
 987
 988 /* Maximum length of annotation data (sum of annotations for
 989    composition and charset).  */
 990 #define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
 991
 992 /* An annotation data is stored in the array coding->charbuf in this
 993    format:
 994      [ -LENGTH ANNOTATION_MASK FROM TO ... ]
 995    LENGTH is the number of elements in the annotation.
 996    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
 997    FROM and TO specify the range of text annotated.  They are relative
 998    to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
 999
1000    The format of the following elements depend on ANNOTATION_MASK.
1001
1002    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1003    follows:
1004      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1005    METHOD is one of enum composition_method.
1006    Optionnal COMPOSITION-COMPONENTS are characters and composition
1007    rules.
1008
1009    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1010    follows.  */
1011
1012 #define ADD_ANNOTATION_DATA(buf, len, mask, from, to)   \
1013   do {                                                  \
1014     *(buf)++ = -(len);                                  \
1015     *(buf)++ = (mask);                                  \
1016     *(buf)++ = (from);                                  \
1017     *(buf)++ = (to);                                    \
1018     coding->annotated = 1;                              \
1019   } while (0);
1020
1021 #define ADD_COMPOSITION_DATA(buf, from, to, method)                           \
1022   do {                                                                        \
1023     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1024     *buf++ = method;                                                          \
1025   } while (0)
1026
1027
1028 #define ADD_CHARSET_DATA(buf, from, to, id)                               \
1029   do {                                                                    \
1030     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1031     *buf++ = id;                                                          \
1032   } while (0)
1033
1034 \f
1035 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1036
1037
1038
1039 \f
1040 /*** 3. UTF-8 ***/
1041
1042 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1043    Check if a text is encoded in UTF-8.  If it is, return 1, else
1044    return 0.  */
1045
1046 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1047 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1048 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1049 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1050 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1051 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1052
1053 static int
1054 detect_coding_utf_8 (coding, detect_info)
1055      struct coding_system *coding;
1056      struct coding_detection_info *detect_info;
1057 {
1058   unsigned char *src = coding->source, *src_base = src;
1059   unsigned char *src_end = coding->source + coding->src_bytes;
1060   int multibytep = coding->src_multibyte;
1061   int consumed_chars = 0;
1062   int found = 0;
1063   int incomplete;
1064
1065   detect_info->checked |= CATEGORY_MASK_UTF_8;
1066   /* A coding system of this category is always ASCII compatible.  */
1067   src += coding->head_ascii;
1068
1069   while (1)
1070     {
1071       int c, c1, c2, c3, c4;
1072
1073       incomplete = 0;
1074       ONE_MORE_BYTE (c);
1075       if (UTF_8_1_OCTET_P (c))
1076         continue;
1077       incomplete = 1;
1078       ONE_MORE_BYTE (c1);
1079       if (! UTF_8_EXTRA_OCTET_P (c1))
1080         break;
1081       if (UTF_8_2_OCTET_LEADING_P (c))
1082         {
1083           found = CATEGORY_MASK_UTF_8;
1084           continue;
1085         }
1086       ONE_MORE_BYTE (c2);
1087       if (! UTF_8_EXTRA_OCTET_P (c2))
1088         break;
1089       if (UTF_8_3_OCTET_LEADING_P (c))
1090         {
1091           found = CATEGORY_MASK_UTF_8;
1092           continue;
1093         }
1094       ONE_MORE_BYTE (c3);
1095       if (! UTF_8_EXTRA_OCTET_P (c3))
1096         break;
1097       if (UTF_8_4_OCTET_LEADING_P (c))
1098         {
1099           found = CATEGORY_MASK_UTF_8;
1100           continue;
1101         }
1102       ONE_MORE_BYTE (c4);
1103       if (! UTF_8_EXTRA_OCTET_P (c4))
1104         break;
1105       if (UTF_8_5_OCTET_LEADING_P (c))
1106         {
1107           found = CATEGORY_MASK_UTF_8;
1108           continue;
1109         }
1110       break;
1111     }
1112   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1113   return 0;
1114
1115  no_more_source:
1116   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1117     {
1118       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1119       return 0;
1120     }
1121   detect_info->found |= found;
1122   return 1;
1123 }
1124
1125
1126 static void
1127 decode_coding_utf_8 (coding)
1128      struct coding_system *coding;
1129 {
1130   unsigned char *src = coding->source + coding->consumed;
1131   unsigned char *src_end = coding->source + coding->src_bytes;
1132   unsigned char *src_base;
1133   int *charbuf = coding->charbuf;
1134   int *charbuf_end = charbuf + coding->charbuf_size;
1135   int consumed_chars = 0, consumed_chars_base;
1136   int multibytep = coding->src_multibyte;
1137   Lisp_Object attr, eol_type, charset_list;
1138
1139   CODING_GET_INFO (coding, attr, eol_type, charset_list);
1140
1141   while (1)
1142     {
1143       int c, c1, c2, c3, c4, c5;
1144
1145       src_base = src;
1146       consumed_chars_base = consumed_chars;
1147
1148       if (charbuf >= charbuf_end)
1149         break;
1150
1151       ONE_MORE_BYTE (c1);
1152       if (UTF_8_1_OCTET_P(c1))
1153         {
1154           c = c1;
1155           if (c == '\r')
1156             {
1157               if (EQ (eol_type, Qdos))
1158                 {
1159                   if (src == src_end)
1160                     {
1161                       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1162                       goto no_more_source;
1163                     }
1164                   if (*src == '\n')
1165                     ONE_MORE_BYTE (c);
1166                 }
1167               else if (EQ (eol_type, Qmac))
1168                 c = '\n';
1169             }
1170         }
1171       else
1172         {
1173           ONE_MORE_BYTE (c2);
1174           if (! UTF_8_EXTRA_OCTET_P (c2))
1175             goto invalid_code;
1176           if (UTF_8_2_OCTET_LEADING_P (c1))
1177             {
1178               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1179               /* Reject overlong sequences here and below.  Encoders
1180                  producing them are incorrect, they can be misleading,
1181                  and they mess up read/write invariance.  */
1182               if (c < 128)
1183                 goto invalid_code;
1184             }
1185           else
1186             {
1187               ONE_MORE_BYTE (c3);
1188               if (! UTF_8_EXTRA_OCTET_P (c3))
1189                 goto invalid_code;
1190               if (UTF_8_3_OCTET_LEADING_P (c1))
1191                 {
1192                   c = (((c1 & 0xF) << 12)
1193                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1194                   if (c < 0x800
1195                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1196                     goto invalid_code;
1197                 }
1198               else
1199                 {
1200                   ONE_MORE_BYTE (c4);
1201                   if (! UTF_8_EXTRA_OCTET_P (c4))
1202                     goto invalid_code;
1203                   if (UTF_8_4_OCTET_LEADING_P (c1))
1204                     {
1205                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1206                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1207                     if (c < 0x10000)
1208                       goto invalid_code;
1209                     }
1210                   else
1211                     {
1212                       ONE_MORE_BYTE (c5);
1213                       if (! UTF_8_EXTRA_OCTET_P (c5))
1214                         goto invalid_code;
1215                       if (UTF_8_5_OCTET_LEADING_P (c1))
1216                         {
1217                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1218                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1219                                | (c5 & 0x3F));
1220                           if ((c > MAX_CHAR) || (c < 0x200000))
1221                             goto invalid_code;
1222                         }
1223                       else
1224                         goto invalid_code;
1225                     }
1226                 }
1227             }
1228         }
1229
1230       *charbuf++ = c;
1231       continue;
1232
1233     invalid_code:
1234       src = src_base;
1235       consumed_chars = consumed_chars_base;
1236       ONE_MORE_BYTE (c);
1237       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1238       coding->errors++;
1239     }
1240
1241  no_more_source:
1242   coding->consumed_char += consumed_chars_base;
1243   coding->consumed = src_base - coding->source;
1244   coding->charbuf_used = charbuf - coding->charbuf;
1245 }
1246
1247
1248 static int
1249 encode_coding_utf_8 (coding)
1250      struct coding_system *coding;
1251 {
1252   int multibytep = coding->dst_multibyte;
1253   int *charbuf = coding->charbuf;
1254   int *charbuf_end = charbuf + coding->charbuf_used;
1255   unsigned char *dst = coding->destination + coding->produced;
1256   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1257   int produced_chars = 0;
1258   int c;
1259
1260   if (multibytep)
1261     {
1262       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1263
1264       while (charbuf < charbuf_end)
1265         {
1266           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1267
1268           ASSURE_DESTINATION (safe_room);
1269           c = *charbuf++;
1270           if (CHAR_BYTE8_P (c))
1271             {
1272               c = CHAR_TO_BYTE8 (c);
1273               EMIT_ONE_BYTE (c);
1274             }
1275           else
1276             {
1277               CHAR_STRING_ADVANCE (c, pend);
1278               for (p = str; p < pend; p++)
1279                 EMIT_ONE_BYTE (*p);
1280             }
1281         }
1282     }
1283   else
1284     {
1285       int safe_room = MAX_MULTIBYTE_LENGTH;
1286
1287       while (charbuf < charbuf_end)
1288         {
1289           ASSURE_DESTINATION (safe_room);
1290           c = *charbuf++;
1291           dst += CHAR_STRING (c, dst);
1292           produced_chars++;
1293         }
1294     }
1295   coding->result = CODING_RESULT_SUCCESS;
1296   coding->produced_char += produced_chars;
1297   coding->produced = dst - coding->destination;
1298   return 0;
1299 }
1300
1301
1302 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1303    Check if a text is encoded in one of UTF-16 based coding systems.
1304    If it is, return 1, else return 0.  */
1305
1306 #define UTF_16_HIGH_SURROGATE_P(val) \
1307   (((val) & 0xFC00) == 0xD800)
1308
1309 #define UTF_16_LOW_SURROGATE_P(val) \
1310   (((val) & 0xFC00) == 0xDC00)
1311
1312 #define UTF_16_INVALID_P(val)   \
1313   (((val) == 0xFFFE)            \
1314    || ((val) == 0xFFFF)         \
1315    || UTF_16_LOW_SURROGATE_P (val))
1316
1317
1318 static int
1319 detect_coding_utf_16 (coding, detect_info)
1320      struct coding_system *coding;
1321      struct coding_detection_info *detect_info;
1322 {
1323   unsigned char *src = coding->source, *src_base = src;
1324   unsigned char *src_end = coding->source + coding->src_bytes;
1325   int multibytep = coding->src_multibyte;
1326   int consumed_chars = 0;
1327   int c1, c2;
1328
1329   detect_info->checked |= CATEGORY_MASK_UTF_16;
1330
1331   if (coding->mode & CODING_MODE_LAST_BLOCK
1332       && (coding->src_bytes & 1))
1333     {
1334       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1335       return 0;
1336     }
1337   ONE_MORE_BYTE (c1);
1338   ONE_MORE_BYTE (c2);
1339
1340   if ((c1 == 0xFF) && (c2 == 0xFE))
1341     {
1342       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1343                              | CATEGORY_MASK_UTF_16_AUTO);
1344       detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
1345     }
1346   else if ((c1 == 0xFE) && (c2 == 0xFF))
1347     {
1348       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1349                              | CATEGORY_MASK_UTF_16_AUTO);
1350       detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
1351     }
1352  no_more_source:
1353   return 1;
1354 }
1355
1356 static void
1357 decode_coding_utf_16 (coding)
1358      struct coding_system *coding;
1359 {
1360   unsigned char *src = coding->source + coding->consumed;
1361   unsigned char *src_end = coding->source + coding->src_bytes;
1362   unsigned char *src_base;
1363   int *charbuf = coding->charbuf;
1364   int *charbuf_end = charbuf + coding->charbuf_size;
1365   int consumed_chars = 0, consumed_chars_base;
1366   int multibytep = coding->src_multibyte;
1367   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1368   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1369   int surrogate = CODING_UTF_16_SURROGATE (coding);
1370   Lisp_Object attr, eol_type, charset_list;
1371
1372   CODING_GET_INFO (coding, attr, eol_type, charset_list);
1373
1374   if (bom == utf_16_with_bom)
1375     {
1376       int c, c1, c2;
1377
1378       src_base = src;
1379       ONE_MORE_BYTE (c1);
1380       ONE_MORE_BYTE (c2);
1381       c = (c1 << 8) | c2;
1382
1383       if (endian == utf_16_big_endian
1384           ? c != 0xFEFF : c != 0xFFFE)
1385         {
1386           /* The first two bytes are not BOM.  Treat them as bytes
1387              for a normal character.  */
1388           src = src_base;
1389           coding->errors++;
1390         }
1391       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1392     }
1393   else if (bom == utf_16_detect_bom)
1394     {
1395       /* We have already tried to detect BOM and failed in
1396          detect_coding.  */
1397       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1398     }
1399
1400   while (1)
1401     {
1402       int c, c1, c2;
1403
1404       src_base = src;
1405       consumed_chars_base = consumed_chars;
1406
1407       if (charbuf + 2 >= charbuf_end)
1408         break;
1409
1410       ONE_MORE_BYTE (c1);
1411       ONE_MORE_BYTE (c2);
1412       c = (endian == utf_16_big_endian
1413            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1414       if (surrogate)
1415         {
1416           if (! UTF_16_LOW_SURROGATE_P (c))
1417             {
1418               if (endian == utf_16_big_endian)
1419                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1420               else
1421                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1422               *charbuf++ = c1;
1423               *charbuf++ = c2;
1424               coding->errors++;
1425               if (UTF_16_HIGH_SURROGATE_P (c))
1426                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1427               else
1428                 *charbuf++ = c;
1429             }
1430           else
1431             {
1432               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1433               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1434               *charbuf++ = c;
1435             }
1436         }
1437       else
1438         {
1439           if (UTF_16_HIGH_SURROGATE_P (c))
1440             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1441           else
1442             *charbuf++ = c;
1443         }
1444     }
1445
1446  no_more_source:
1447   coding->consumed_char += consumed_chars_base;
1448   coding->consumed = src_base - coding->source;
1449   coding->charbuf_used = charbuf - coding->charbuf;
1450 }
1451
1452 static int
1453 encode_coding_utf_16 (coding)
1454      struct coding_system *coding;
1455 {
1456   int multibytep = coding->dst_multibyte;
1457   int *charbuf = coding->charbuf;
1458   int *charbuf_end = charbuf + coding->charbuf_used;
1459   unsigned char *dst = coding->destination + coding->produced;
1460   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1461   int safe_room = 8;
1462   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1463   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1464   int produced_chars = 0;
1465   Lisp_Object attrs, eol_type, charset_list;
1466   int c;
1467
1468   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1469
1470   if (bom != utf_16_without_bom)
1471     {
1472       ASSURE_DESTINATION (safe_room);
1473       if (big_endian)
1474         EMIT_TWO_BYTES (0xFE, 0xFF);
1475       else
1476         EMIT_TWO_BYTES (0xFF, 0xFE);
1477       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1478     }
1479
1480   while (charbuf < charbuf_end)
1481     {
1482       ASSURE_DESTINATION (safe_room);
1483       c = *charbuf++;
1484       if (c >= MAX_UNICODE_CHAR)
1485         c = coding->default_char;
1486
1487       if (c < 0x10000)
1488         {
1489           if (big_endian)
1490             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1491           else
1492             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1493         }
1494       else
1495         {
1496           int c1, c2;
1497
1498           c -= 0x10000;
1499           c1 = (c >> 10) + 0xD800;
1500           c2 = (c & 0x3FF) + 0xDC00;
1501           if (big_endian)
1502             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1503           else
1504             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1505         }
1506     }
1507   coding->result = CODING_RESULT_SUCCESS;
1508   coding->produced = dst - coding->destination;
1509   coding->produced_char += produced_chars;
1510   return 0;
1511 }
1512
1513 \f
1514 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1515
1516 /* Emacs' internal format for representation of multiple character
1517    sets is a kind of multi-byte encoding, i.e. characters are
1518    represented by variable-length sequences of one-byte codes.
1519
1520    ASCII characters and control characters (e.g. `tab', `newline') are
1521    represented by one-byte sequences which are their ASCII codes, in
1522    the range 0x00 through 0x7F.
1523
1524    8-bit characters of the range 0x80..0x9F are represented by
1525    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1526    code + 0x20).
1527
1528    8-bit characters of the range 0xA0..0xFF are represented by
1529    one-byte sequences which are their 8-bit code.
1530
1531    The other characters are represented by a sequence of `base
1532    leading-code', optional `extended leading-code', and one or two
1533    `position-code's.  The length of the sequence is determined by the
1534    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1535    whereas extended leading-code and position-code take the range 0xA0
1536    through 0xFF.  See `charset.h' for more details about leading-code
1537    and position-code.
1538
1539    --- CODE RANGE of Emacs' internal format ---
1540    character set        range
1541    -------------        -----
1542    ascii                0x00..0x7F
1543    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1544    eight-bit-graphic    0xA0..0xBF
1545    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1546    ---------------------------------------------
1547
1548    As this is the internal character representation, the format is
1549    usually not used externally (i.e. in a file or in a data sent to a
1550    process).  But, it is possible to have a text externally in this
1551    format (i.e. by encoding by the coding system `emacs-mule').
1552
1553    In that case, a sequence of one-byte codes has a slightly different
1554    form.
1555
1556    At first, all characters in eight-bit-control are represented by
1557    one-byte sequences which are their 8-bit code.
1558
1559    Next, character composition data are represented by the byte
1560    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1561    where,
1562         METHOD is 0xF0 plus one of composition method (enum
1563         composition_method),
1564
1565         BYTES is 0xA0 plus a byte length of this composition data,
1566
1567         CHARS is 0x20 plus a number of characters composed by this
1568         data,
1569
1570         COMPONENTs are characters of multibye form or composition
1571         rules encoded by two-byte of ASCII codes.
1572
1573    In addition, for backward compatibility, the following formats are
1574    also recognized as composition data on decoding.
1575
1576    0x80 MSEQ ...
1577    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1578
1579    Here,
1580         MSEQ is a multibyte form but in these special format:
1581           ASCII: 0xA0 ASCII_CODE+0x80,
1582           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1583         RULE is a one byte code of the range 0xA0..0xF0 that
1584         represents a composition rule.
1585   */
1586
1587 char emacs_mule_bytes[256];
1588
1589 int
1590 emacs_mule_char (coding, src, nbytes, nchars, id)
1591      struct coding_system *coding;
1592      unsigned char *src;
1593      int *nbytes, *nchars, *id;
1594 {
1595   unsigned char *src_end = coding->source + coding->src_bytes;
1596   int multibytep = coding->src_multibyte;
1597   unsigned char *src_base = src;
1598   struct charset *charset;
1599   unsigned code;
1600   int c;
1601   int consumed_chars = 0;
1602
1603   ONE_MORE_BYTE (c);
1604   switch (emacs_mule_bytes[c])
1605     {
1606     case 2:
1607       if (! (charset = emacs_mule_charset[c]))
1608         goto invalid_code;
1609       ONE_MORE_BYTE (c);
1610       code = c & 0x7F;
1611       break;
1612
1613     case 3:
1614       if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1615           || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1616         {
1617           ONE_MORE_BYTE (c);
1618           if (! (charset = emacs_mule_charset[c]))
1619             goto invalid_code;
1620           ONE_MORE_BYTE (c);
1621           code = c & 0x7F;
1622         }
1623       else
1624         {
1625           if (! (charset = emacs_mule_charset[c]))
1626             goto invalid_code;
1627           ONE_MORE_BYTE (c);
1628           code = (c & 0x7F) << 8;
1629           ONE_MORE_BYTE (c);
1630           code |= c & 0x7F;
1631         }
1632       break;
1633
1634     case 4:
1635       ONE_MORE_BYTE (c);
1636       if (! (charset = emacs_mule_charset[c]))
1637         goto invalid_code;
1638       ONE_MORE_BYTE (c);
1639       code = (c & 0x7F) << 8;
1640       ONE_MORE_BYTE (c);
1641       code |= c & 0x7F;
1642       break;
1643
1644     case 1:
1645       code = c;
1646       charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1647                                  ? charset_ascii : charset_eight_bit);
1648       break;
1649
1650     default:
1651       abort ();
1652     }
1653   c = DECODE_CHAR (charset, code);
1654   if (c < 0)
1655     goto invalid_code;
1656   *nbytes = src - src_base;
1657   *nchars = consumed_chars;
1658   if (id)
1659     *id = charset->id;
1660   return c;
1661
1662  no_more_source:
1663   return -2;
1664
1665  invalid_code:
1666   return -1;
1667 }
1668
1669
1670 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1671    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1672    else return 0.  */
1673
1674 static int
1675 detect_coding_emacs_mule (coding, detect_info)
1676      struct coding_system *coding;
1677      struct coding_detection_info *detect_info;
1678 {
1679   unsigned char *src = coding->source, *src_base = src;
1680   unsigned char *src_end = coding->source + coding->src_bytes;
1681   int multibytep = coding->src_multibyte;
1682   int consumed_chars = 0;
1683   int c;
1684   int found = 0;
1685   int incomplete;
1686
1687   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1688   /* A coding system of this category is always ASCII compatible.  */
1689   src += coding->head_ascii;
1690
1691   while (1)
1692     {
1693       incomplete = 0;
1694       ONE_MORE_BYTE (c);
1695       incomplete = 1;
1696
1697       if (c == 0x80)
1698         {
1699           /* Perhaps the start of composite character.  We simple skip
1700              it because analyzing it is too heavy for detecting.  But,
1701              at least, we check that the composite character
1702              constitues of more than 4 bytes.  */
1703           unsigned char *src_base;
1704
1705         repeat:
1706           src_base = src;
1707           do
1708             {
1709               ONE_MORE_BYTE (c);
1710             }
1711           while (c >= 0xA0);
1712
1713           if (src - src_base <= 4)
1714             break;
1715           found = CATEGORY_MASK_EMACS_MULE;
1716           if (c == 0x80)
1717             goto repeat;
1718         }
1719
1720       if (c < 0x80)
1721         {
1722           if (c < 0x20
1723               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1724             break;
1725         }
1726       else
1727         {
1728           unsigned char *src_base = src - 1;
1729
1730           do
1731             {
1732               ONE_MORE_BYTE (c);
1733             }
1734           while (c >= 0xA0);
1735           if (src - src_base != emacs_mule_bytes[*src_base])
1736             break;
1737           found = CATEGORY_MASK_EMACS_MULE;
1738         }
1739     }
1740   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1741   return 0;
1742
1743  no_more_source:
1744   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1745     {
1746       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1747       return 0;
1748     }
1749   detect_info->found |= found;
1750   return 1;
1751 }
1752
1753
1754 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1755
1756 /* Decode a character represented as a component of composition
1757    sequence of Emacs 20/21 style at SRC.  Set C to that character and
1758    update SRC to the head of next character (or an encoded composition
1759    rule).  If SRC doesn't points a composition component, set C to -1.
1760    If SRC points an invalid byte sequence, global exit by a return
1761    value 0.  */
1762
1763 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
1764   if (1)                                                        \
1765     {                                                           \
1766       int c;                                                    \
1767       int nbytes, nchars;                                       \
1768                                                                 \
1769       if (src == src_end)                                       \
1770         break;                                                  \
1771       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1772       if (c < 0)                                                \
1773         {                                                       \
1774           if (c == -2)                                          \
1775             break;                                              \
1776           goto invalid_code;                                    \
1777         }                                                       \
1778       *buf++ = c;                                               \
1779       src += nbytes;                                            \
1780       consumed_chars += nchars;                                 \
1781     }                                                           \
1782   else
1783
1784
1785 /* Decode a composition rule represented as a component of composition
1786    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
1787    and increment BUF.  If SRC points an invalid byte sequence, set C
1788    to -1.  */
1789
1790 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
1791   do {                                                  \
1792     int c, gref, nref;                                  \
1793                                                         \
1794     if (src >= src_end)                                 \
1795       goto invalid_code;                                \
1796     ONE_MORE_BYTE_NO_CHECK (c);                         \
1797     c -= 0x20;                                          \
1798     if (c < 0 || c >= 81)                               \
1799       goto invalid_code;                                \
1800                                                         \
1801     gref = c / 9, nref = c % 9;                         \
1802     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1803   } while (0)
1804
1805
1806 /* Decode a composition rule represented as a component of composition
1807    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
1808    and increment BUF.  If SRC points an invalid byte sequence, set C
1809    to -1.  */
1810
1811 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
1812   do {                                                  \
1813     int gref, nref;                                     \
1814                                                         \
1815     if (src + 1>= src_end)                              \
1816       goto invalid_code;                                \
1817     ONE_MORE_BYTE_NO_CHECK (gref);                      \
1818     gref -= 0x20;                                       \
1819     ONE_MORE_BYTE_NO_CHECK (nref);                      \
1820     nref -= 0x20;                                       \
1821     if (gref < 0 || gref >= 81                          \
1822         || nref < 0 || nref >= 81)                      \
1823       goto invalid_code;                                \
1824     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1825   } while (0)
1826
1827
1828 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
1829   do {                                                                  \
1830     /* Emacs 21 style format.  The first three bytes at SRC are         \
1831        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
1832        the byte length of this composition information, CHARS is the    \
1833        number of characters composed by this composition.  */           \
1834     enum composition_method method = c - 0xF2;                          \
1835     int *charbuf_base = charbuf;                                        \
1836     int from, to;                                                       \
1837     int consumed_chars_limit;                                           \
1838     int nbytes, nchars;                                                 \
1839                                                                         \
1840     ONE_MORE_BYTE (c);                                                  \
1841     nbytes = c - 0xA0;                                                  \
1842     if (nbytes < 3)                                                     \
1843       goto invalid_code;                                                \
1844     ONE_MORE_BYTE (c);                                                  \
1845     nchars = c - 0xA0;                                                  \
1846     from = coding->produced + char_offset;                              \
1847     to = from + nchars;                                                 \
1848     ADD_COMPOSITION_DATA (charbuf, from, to, method);                   \
1849     consumed_chars_limit = consumed_chars_base + nbytes;                \
1850     if (method != COMPOSITION_RELATIVE)                                 \
1851       {                                                                 \
1852         int i = 0;                                                      \
1853         while (consumed_chars < consumed_chars_limit)                   \
1854           {                                                             \
1855             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
1856               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
1857             else                                                        \
1858               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
1859             i++;                                                        \
1860           }                                                             \
1861         if (consumed_chars < consumed_chars_limit)                      \
1862           goto invalid_code;                                            \
1863         charbuf_base[0] -= i;                                           \
1864       }                                                                 \
1865   } while (0)
1866
1867
1868 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)            \
1869   do {                                                          \
1870     /* Emacs 20 style format for relative composition.  */      \
1871     /* Store multibyte form of characters to be composed.  */   \
1872     enum composition_method method = COMPOSITION_RELATIVE;      \
1873     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1874     int *buf = components;                                      \
1875     int i, j;                                                   \
1876     int from, to;                                               \
1877                                                                 \
1878     src = src_base;                                             \
1879     ONE_MORE_BYTE (c);          /* skip 0x80 */                 \
1880     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1881       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                 \
1882     if (i < 2)                                                  \
1883       goto invalid_code;                                        \
1884     from = coding->produced_char + char_offset;                 \
1885     to = from + i;                                              \
1886     ADD_COMPOSITION_DATA (charbuf, from, to, method);           \
1887     for (j = 0; j < i; j++)                                     \
1888       *charbuf++ = components[j];                               \
1889   } while (0)
1890
1891
1892 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
1893   do {                                                          \
1894     /* Emacs 20 style format for rule-base composition.  */     \
1895     /* Store multibyte form of characters to be composed.  */   \
1896     enum composition_method method = COMPOSITION_WITH_RULE;     \
1897     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1898     int *buf = components;                                      \
1899     int i, j;                                                   \
1900     int from, to;                                               \
1901                                                                 \
1902     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
1903     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1904       {                                                         \
1905         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
1906         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
1907       }                                                         \
1908     if (i < 1 || (buf - components) % 2 == 0)                   \
1909       goto invalid_code;                                        \
1910     if (charbuf + i + (i / 2) + 1 < charbuf_end)                \
1911       goto no_more_source;                                      \
1912     from = coding->produced_char + char_offset;                 \
1913     to = from + i;                                              \
1914     ADD_COMPOSITION_DATA (buf, from, to, method);               \
1915     for (j = 0; j < i; j++)                                     \
1916       *charbuf++ = components[j];                               \
1917     for (j = 0; j < i; j += 2)                                  \
1918       *charbuf++ = components[j];                               \
1919   } while (0)
1920
1921
1922 static void
1923 decode_coding_emacs_mule (coding)
1924      struct coding_system *coding;
1925 {
1926   unsigned char *src = coding->source + coding->consumed;
1927   unsigned char *src_end = coding->source + coding->src_bytes;
1928   unsigned char *src_base;
1929   int *charbuf = coding->charbuf;
1930   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
1931   int consumed_chars = 0, consumed_chars_base;
1932   int multibytep = coding->src_multibyte;
1933   Lisp_Object attrs, eol_type, charset_list;
1934   int char_offset = coding->produced_char;
1935   int last_offset = char_offset;
1936   int last_id = charset_ascii;
1937
1938   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1939
1940   while (1)
1941     {
1942       int c;
1943
1944       src_base = src;
1945       consumed_chars_base = consumed_chars;
1946
1947       if (charbuf >= charbuf_end)
1948         break;
1949
1950       ONE_MORE_BYTE (c);
1951
1952       if (c < 0x80)
1953         {
1954           if (c == '\r')
1955             {
1956               if (EQ (eol_type, Qdos))
1957                 {
1958                   if (src == src_end)
1959                     {
1960                       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1961                       goto no_more_source;
1962                     }
1963                   if (*src == '\n')
1964                     ONE_MORE_BYTE (c);
1965                 }
1966               else if (EQ (eol_type, Qmac))
1967                 c = '\n';
1968             }
1969           *charbuf++ = c;
1970           char_offset++;
1971         }
1972       else if (c == 0x80)
1973         {
1974           ONE_MORE_BYTE (c);
1975           if (c - 0xF2 >= COMPOSITION_RELATIVE
1976               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
1977             DECODE_EMACS_MULE_21_COMPOSITION (c);
1978           else if (c < 0xC0)
1979             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1980           else if (c == 0xFF)
1981             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1982           else
1983             goto invalid_code;
1984         }
1985       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1986         {
1987           int nbytes, nchars;
1988           int id;
1989
1990           src = src_base;
1991           consumed_chars = consumed_chars_base;
1992           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
1993           if (c < 0)
1994             {
1995               if (c == -2)
1996                 break;
1997               goto invalid_code;
1998             }
1999           if (last_id != id)
2000             {
2001               if (last_id != charset_ascii)
2002                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2003               last_id = id;
2004               last_offset = char_offset;
2005             }
2006           *charbuf++ = c;
2007           src += nbytes;
2008           consumed_chars += nchars;
2009           char_offset++;
2010         }
2011       continue;
2012
2013     invalid_code:
2014       src = src_base;
2015       consumed_chars = consumed_chars_base;
2016       ONE_MORE_BYTE (c);
2017       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2018       char_offset++;
2019       coding->errors++;
2020     }
2021
2022  no_more_source:
2023   if (last_id != charset_ascii)
2024     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2025   coding->consumed_char += consumed_chars_base;
2026   coding->consumed = src_base - coding->source;
2027   coding->charbuf_used = charbuf - coding->charbuf;
2028 }
2029
2030
2031 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2032   do {                                          \
2033     if (id < 0xA0)                              \
2034       codes[0] = id, codes[1] = 0;              \
2035     else if (id < 0xE0)                         \
2036       codes[0] = 0x9A, codes[1] = id;           \
2037     else if (id < 0xF0)                         \
2038       codes[0] = 0x9B, codes[1] = id;           \
2039     else if (id < 0xF5)                         \
2040       codes[0] = 0x9C, codes[1] = id;           \
2041     else                                        \
2042       codes[0] = 0x9D, codes[1] = id;           \
2043   } while (0);
2044
2045
2046 static int
2047 encode_coding_emacs_mule (coding)
2048      struct coding_system *coding;
2049 {
2050   int multibytep = coding->dst_multibyte;
2051   int *charbuf = coding->charbuf;
2052   int *charbuf_end = charbuf + coding->charbuf_used;
2053   unsigned char *dst = coding->destination + coding->produced;
2054   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2055   int safe_room = 8;
2056   int produced_chars = 0;
2057   Lisp_Object attrs, eol_type, charset_list;
2058   int c;
2059   int preferred_charset_id = -1;
2060
2061   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2062
2063   while (charbuf < charbuf_end)
2064     {
2065       ASSURE_DESTINATION (safe_room);
2066       c = *charbuf++;
2067
2068       if (c < 0)
2069         {
2070           /* Handle an annotation.  */
2071           switch (*charbuf)
2072             {
2073             case CODING_ANNOTATE_COMPOSITION_MASK:
2074               /* Not yet implemented.  */
2075               break;
2076             case CODING_ANNOTATE_CHARSET_MASK:
2077               preferred_charset_id = charbuf[3];
2078               if (preferred_charset_id >= 0
2079                   && NILP (Fmemq (make_number (preferred_charset_id),
2080                                   charset_list)))
2081                 preferred_charset_id = -1;
2082               break;
2083             default:
2084               abort ();
2085             }
2086           charbuf += -c - 1;
2087           continue;
2088         }
2089
2090       if (ASCII_CHAR_P (c))
2091         EMIT_ONE_ASCII_BYTE (c);
2092       else if (CHAR_BYTE8_P (c))
2093         {
2094           c = CHAR_TO_BYTE8 (c);
2095           EMIT_ONE_BYTE (c);
2096         }
2097       else
2098         {
2099           struct charset *charset;
2100           unsigned code;
2101           int dimension;
2102           int emacs_mule_id;
2103           unsigned char leading_codes[2];
2104
2105           if (preferred_charset_id >= 0)
2106             {
2107               charset = CHARSET_FROM_ID (preferred_charset_id);
2108               if (! CHAR_CHARSET_P (c, charset))
2109                 charset = char_charset (c, charset_list, NULL);
2110             }
2111           else
2112             charset = char_charset (c, charset_list, &code);
2113           if (! charset)
2114             {
2115               c = coding->default_char;
2116               if (ASCII_CHAR_P (c))
2117                 {
2118                   EMIT_ONE_ASCII_BYTE (c);
2119                   continue;
2120                 }
2121               charset = char_charset (c, charset_list, &code);
2122             }
2123           dimension = CHARSET_DIMENSION (charset);
2124           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2125           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2126           EMIT_ONE_BYTE (leading_codes[0]);
2127           if (leading_codes[1])
2128             EMIT_ONE_BYTE (leading_codes[1]);
2129           if (dimension == 1)
2130             EMIT_ONE_BYTE (code);
2131           else
2132             {
2133               EMIT_ONE_BYTE (code >> 8);
2134               EMIT_ONE_BYTE (code & 0xFF);
2135             }
2136         }
2137     }
2138   coding->result = CODING_RESULT_SUCCESS;
2139   coding->produced_char += produced_chars;
2140   coding->produced = dst - coding->destination;
2141   return 0;
2142 }
2143
2144 \f
2145 /*** 7. ISO2022 handlers ***/
2146
2147 /* The following note describes the coding system ISO2022 briefly.
2148    Since the intention of this note is to help understand the
2149    functions in this file, some parts are NOT ACCURATE or are OVERLY
2150    SIMPLIFIED.  For thorough understanding, please refer to the
2151    original document of ISO2022.  This is equivalent to the standard
2152    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2153
2154    ISO2022 provides many mechanisms to encode several character sets
2155    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2156    is encoded using bytes less than 128.  This may make the encoded
2157    text a little bit longer, but the text passes more easily through
2158    several types of gateway, some of which strip off the MSB (Most
2159    Significant Bit).
2160
2161    There are two kinds of character sets: control character sets and
2162    graphic character sets.  The former contain control characters such
2163    as `newline' and `escape' to provide control functions (control
2164    functions are also provided by escape sequences).  The latter
2165    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2166    two control character sets and many graphic character sets.
2167
2168    Graphic character sets are classified into one of the following
2169    four classes, according to the number of bytes (DIMENSION) and
2170    number of characters in one dimension (CHARS) of the set:
2171    - DIMENSION1_CHARS94
2172    - DIMENSION1_CHARS96
2173    - DIMENSION2_CHARS94
2174    - DIMENSION2_CHARS96
2175
2176    In addition, each character set is assigned an identification tag,
2177    unique for each set, called the "final character" (denoted as <F>
2178    hereafter).  The <F> of each character set is decided by ECMA(*)
2179    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2180    (0x30..0x3F are for private use only).
2181
2182    Note (*): ECMA = European Computer Manufacturers Association
2183
2184    Here are examples of graphic character sets [NAME(<F>)]:
2185         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2186         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2187         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2188         o DIMENSION2_CHARS96 -- none for the moment
2189
2190    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2191         C0 [0x00..0x1F] -- control character plane 0
2192         GL [0x20..0x7F] -- graphic character plane 0
2193         C1 [0x80..0x9F] -- control character plane 1
2194         GR [0xA0..0xFF] -- graphic character plane 1
2195
2196    A control character set is directly designated and invoked to C0 or
2197    C1 by an escape sequence.  The most common case is that:
2198    - ISO646's  control character set is designated/invoked to C0, and
2199    - ISO6429's control character set is designated/invoked to C1,
2200    and usually these designations/invocations are omitted in encoded
2201    text.  In a 7-bit environment, only C0 can be used, and a control
2202    character for C1 is encoded by an appropriate escape sequence to
2203    fit into the environment.  All control characters for C1 are
2204    defined to have corresponding escape sequences.
2205
2206    A graphic character set is at first designated to one of four
2207    graphic registers (G0 through G3), then these graphic registers are
2208    invoked to GL or GR.  These designations and invocations can be
2209    done independently.  The most common case is that G0 is invoked to
2210    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2211    these invocations and designations are omitted in encoded text.
2212    In a 7-bit environment, only GL can be used.
2213
2214    When a graphic character set of CHARS94 is invoked to GL, codes
2215    0x20 and 0x7F of the GL area work as control characters SPACE and
2216    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2217    be used.
2218
2219    There are two ways of invocation: locking-shift and single-shift.
2220    With locking-shift, the invocation lasts until the next different
2221    invocation, whereas with single-shift, the invocation affects the
2222    following character only and doesn't affect the locking-shift
2223    state.  Invocations are done by the following control characters or
2224    escape sequences:
2225
2226    ----------------------------------------------------------------------
2227    abbrev  function                  cntrl escape seq   description
2228    ----------------------------------------------------------------------
2229    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2230    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2231    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2232    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2233    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2234    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2235    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2236    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2237    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2238    ----------------------------------------------------------------------
2239    (*) These are not used by any known coding system.
2240
2241    Control characters for these functions are defined by macros
2242    ISO_CODE_XXX in `coding.h'.
2243
2244    Designations are done by the following escape sequences:
2245    ----------------------------------------------------------------------
2246    escape sequence      description
2247    ----------------------------------------------------------------------
2248    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2249    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2250    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2251    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2252    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2253    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2254    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2255    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2256    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2257    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2258    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2259    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2260    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2261    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2262    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2263    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2264    ----------------------------------------------------------------------
2265
2266    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2267    of dimension 1, chars 94, and final character <F>, etc...
2268
2269    Note (*): Although these designations are not allowed in ISO2022,
2270    Emacs accepts them on decoding, and produces them on encoding
2271    CHARS96 character sets in a coding system which is characterized as
2272    7-bit environment, non-locking-shift, and non-single-shift.
2273
2274    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2275    '(' must be omitted.  We refer to this as "short-form" hereafter.
2276
2277    Now you may notice that there are a lot of ways of encoding the
2278    same multilingual text in ISO2022.  Actually, there exist many
2279    coding systems such as Compound Text (used in X11's inter client
2280    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2281    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2282    localized platforms), and all of these are variants of ISO2022.
2283
2284    In addition to the above, Emacs handles two more kinds of escape
2285    sequences: ISO6429's direction specification and Emacs' private
2286    sequence for specifying character composition.
2287
2288    ISO6429's direction specification takes the following form:
2289         o CSI ']'      -- end of the current direction
2290         o CSI '0' ']'  -- end of the current direction
2291         o CSI '1' ']'  -- start of left-to-right text
2292         o CSI '2' ']'  -- start of right-to-left text
2293    The control character CSI (0x9B: control sequence introducer) is
2294    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2295
2296    Character composition specification takes the following form:
2297         o ESC '0' -- start relative composition
2298         o ESC '1' -- end composition
2299         o ESC '2' -- start rule-base composition (*)
2300         o ESC '3' -- start relative composition with alternate chars  (**)
2301         o ESC '4' -- start rule-base composition with alternate chars  (**)
2302   Since these are not standard escape sequences of any ISO standard,
2303   the use of them with these meanings is restricted to Emacs only.
2304
2305   (*) This form is used only in Emacs 20.7 and older versions,
2306   but newer versions can safely decode it.
2307   (**) This form is used only in Emacs 21.1 and newer versions,
2308   and older versions can't decode it.
2309
2310   Here's a list of example usages of these composition escape
2311   sequences (categorized by `enum composition_method').
2312
2313   COMPOSITION_RELATIVE:
2314         ESC 0 CHAR [ CHAR ] ESC 1
2315   COMPOSITION_WITH_RULE:
2316         ESC 2 CHAR [ RULE CHAR ] ESC 1
2317   COMPOSITION_WITH_ALTCHARS:
2318         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2319   COMPOSITION_WITH_RULE_ALTCHARS:
2320         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2321
2322 enum iso_code_class_type iso_code_class[256];
2323
2324 #define SAFE_CHARSET_P(coding, id)      \
2325   ((id) <= (coding)->max_charset_id     \
2326    && (coding)->safe_charsets[id] >= 0)
2327
2328
2329 #define SHIFT_OUT_OK(category)  \
2330   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2331
2332 static void
2333 setup_iso_safe_charsets (attrs)
2334      Lisp_Object attrs;
2335 {
2336   Lisp_Object charset_list, safe_charsets;
2337   Lisp_Object request;
2338   Lisp_Object reg_usage;
2339   Lisp_Object tail;
2340   int reg94, reg96;
2341   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2342   int max_charset_id;
2343
2344   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2345   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2346       && ! EQ (charset_list, Viso_2022_charset_list))
2347     {
2348       CODING_ATTR_CHARSET_LIST (attrs)
2349         = charset_list = Viso_2022_charset_list;
2350       ASET (attrs, coding_attr_safe_charsets, Qnil);
2351     }
2352
2353   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2354     return;
2355
2356   max_charset_id = 0;
2357   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2358     {
2359       int id = XINT (XCAR (tail));
2360       if (max_charset_id < id)
2361         max_charset_id = id;
2362     }
2363
2364   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2365                                 make_number (255));
2366   request = AREF (attrs, coding_attr_iso_request);
2367   reg_usage = AREF (attrs, coding_attr_iso_usage);
2368   reg94 = XINT (XCAR (reg_usage));
2369   reg96 = XINT (XCDR (reg_usage));
2370
2371   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2372     {
2373       Lisp_Object id;
2374       Lisp_Object reg;
2375       struct charset *charset;
2376
2377       id = XCAR (tail);
2378       charset = CHARSET_FROM_ID (XINT (id));
2379       reg = Fcdr (Fassq (id, request));
2380       if (! NILP (reg))
2381         XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2382       else if (charset->iso_chars_96)
2383         {
2384           if (reg96 < 4)
2385             XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2386         }
2387       else
2388         {
2389           if (reg94 < 4)
2390             XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2391         }
2392     }
2393   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2394 }
2395
2396
2397 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2398    Check if a text is encoded in one of ISO-2022 based codig systems.
2399    If it is, return 1, else return 0.  */
2400
2401 static int
2402 detect_coding_iso_2022 (coding, detect_info)
2403      struct coding_system *coding;
2404      struct coding_detection_info *detect_info;
2405 {
2406   unsigned char *src = coding->source, *src_base = src;
2407   unsigned char *src_end = coding->source + coding->src_bytes;
2408   int multibytep = coding->src_multibyte;
2409   int single_shifting = 0;
2410   int id;
2411   int c, c1;
2412   int consumed_chars = 0;
2413   int i;
2414   int rejected = 0;
2415   int found = 0;
2416
2417   detect_info->checked |= CATEGORY_MASK_ISO;
2418
2419   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2420     {
2421       struct coding_system *this = &(coding_categories[i]);
2422       Lisp_Object attrs, val;
2423
2424       attrs = CODING_ID_ATTRS (this->id);
2425       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2426           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2427         setup_iso_safe_charsets (attrs);
2428       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2429       this->max_charset_id = XSTRING (val)->size - 1;
2430       this->safe_charsets = (char *) XSTRING (val)->data;
2431     }
2432
2433   /* A coding system of this category is always ASCII compatible.  */
2434   src += coding->head_ascii;
2435
2436   while (rejected != CATEGORY_MASK_ISO)
2437     {
2438       ONE_MORE_BYTE (c);
2439       switch (c)
2440         {
2441         case ISO_CODE_ESC:
2442           if (inhibit_iso_escape_detection)
2443             break;
2444           single_shifting = 0;
2445           ONE_MORE_BYTE (c);
2446           if (c >= '(' && c <= '/')
2447             {
2448               /* Designation sequence for a charset of dimension 1.  */
2449               ONE_MORE_BYTE (c1);
2450               if (c1 < ' ' || c1 >= 0x80
2451                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2452                 /* Invalid designation sequence.  Just ignore.  */
2453                 break;
2454             }
2455           else if (c == '$')
2456             {
2457               /* Designation sequence for a charset of dimension 2.  */
2458               ONE_MORE_BYTE (c);
2459               if (c >= '@' && c <= 'B')
2460                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2461                 id = iso_charset_table[1][0][c];
2462               else if (c >= '(' && c <= '/')
2463                 {
2464                   ONE_MORE_BYTE (c1);
2465                   if (c1 < ' ' || c1 >= 0x80
2466                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2467                     /* Invalid designation sequence.  Just ignore.  */
2468                     break;
2469                 }
2470               else
2471                 /* Invalid designation sequence.  Just ignore it.  */
2472                 break;
2473             }
2474           else if (c == 'N' || c == 'O')
2475             {
2476               /* ESC <Fe> for SS2 or SS3.  */
2477               single_shifting = 1;
2478               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2479               break;
2480             }
2481           else if (c >= '0' && c <= '4')
2482             {
2483               /* ESC <Fp> for start/end composition.  */
2484               found |= CATEGORY_MASK_ISO;
2485               break;
2486             }
2487           else
2488             {
2489               /* Invalid escape sequence.  Just ignore it.  */
2490               break;
2491             }
2492
2493           /* We found a valid designation sequence for CHARSET.  */
2494           rejected |= CATEGORY_MASK_ISO_8BIT;
2495           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2496                               id))
2497             found |= CATEGORY_MASK_ISO_7;
2498           else
2499             rejected |= CATEGORY_MASK_ISO_7;
2500           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2501                               id))
2502             found |= CATEGORY_MASK_ISO_7_TIGHT;
2503           else
2504             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2505           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2506                               id))
2507             found |= CATEGORY_MASK_ISO_7_ELSE;
2508           else
2509             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2510           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2511                               id))
2512             found |= CATEGORY_MASK_ISO_8_ELSE;
2513           else
2514             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2515           break;
2516
2517         case ISO_CODE_SO:
2518         case ISO_CODE_SI:
2519           /* Locking shift out/in.  */
2520           if (inhibit_iso_escape_detection)
2521             break;
2522           single_shifting = 0;
2523           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2524           found |= CATEGORY_MASK_ISO_ELSE;
2525           break;
2526
2527         case ISO_CODE_CSI:
2528           /* Control sequence introducer.  */
2529           single_shifting = 0;
2530           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2531           found |= CATEGORY_MASK_ISO_8_ELSE;
2532           goto check_extra_latin;
2533
2534
2535         case ISO_CODE_SS2:
2536         case ISO_CODE_SS3:
2537           /* Single shift.   */
2538           if (inhibit_iso_escape_detection)
2539             break;
2540           single_shifting = 1;
2541           rejected |= CATEGORY_MASK_ISO_7BIT;
2542           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2543               & CODING_ISO_FLAG_SINGLE_SHIFT)
2544             found |= CATEGORY_MASK_ISO_8_1;
2545           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2546               & CODING_ISO_FLAG_SINGLE_SHIFT)
2547             found |= CATEGORY_MASK_ISO_8_2;
2548           goto check_extra_latin;
2549
2550         default:
2551           if (c < 0x80)
2552             {
2553               single_shifting = 0;
2554               break;
2555             }
2556           if (c >= 0xA0)
2557             {
2558               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2559               found |= CATEGORY_MASK_ISO_8_1;
2560               /* Check the length of succeeding codes of the range
2561                  0xA0..0FF.  If the byte length is even, we include
2562                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2563                  only when we are not single shifting.  */
2564               if (! single_shifting
2565                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2566                 {
2567                   int i = 1;
2568                   while (src < src_end)
2569                     {
2570                       ONE_MORE_BYTE (c);
2571                       if (c < 0xA0)
2572                         break;
2573                       i++;
2574                     }
2575
2576                   if (i & 1 && src < src_end)
2577                     rejected |= CATEGORY_MASK_ISO_8_2;
2578                   else
2579                     found |= CATEGORY_MASK_ISO_8_2;
2580                 }
2581               break;
2582             }
2583         check_extra_latin:
2584           single_shifting = 0;
2585           if (! VECTORP (Vlatin_extra_code_table)
2586               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2587             {
2588               rejected = CATEGORY_MASK_ISO;
2589               break;
2590             }
2591           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2592               & CODING_ISO_FLAG_LATIN_EXTRA)
2593             found |= CATEGORY_MASK_ISO_8_1;
2594           else
2595             rejected |= CATEGORY_MASK_ISO_8_1;
2596           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2597               & CODING_ISO_FLAG_LATIN_EXTRA)
2598             found |= CATEGORY_MASK_ISO_8_2;
2599           else
2600             rejected |= CATEGORY_MASK_ISO_8_2;
2601         }
2602     }
2603   detect_info->rejected |= CATEGORY_MASK_ISO;
2604   return 0;
2605
2606  no_more_source:
2607   detect_info->rejected |= rejected;
2608   detect_info->found |= (found & ~rejected);
2609   return 1;
2610 }
2611
2612
2613 /* Set designation state into CODING.  */
2614 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2615   do {                                                                  \
2616     int id, prev;                                                       \
2617                                                                         \
2618     if (final < '0' || final >= 128                                     \
2619         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2620         || !SAFE_CHARSET_P (coding, id))                                \
2621       {                                                                 \
2622         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2623         goto invalid_code;                                              \
2624       }                                                                 \
2625     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2626     if (id == charset_jisx0201_roman)                                   \
2627       {                                                                 \
2628         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2629           id = charset_ascii;                                           \
2630       }                                                                 \
2631     else if (id == charset_jisx0208_1978)                               \
2632       {                                                                 \
2633         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2634           id = charset_jisx0208;                                        \
2635       }                                                                 \
2636     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2637     /* If there was an invalid designation to REG previously, and this  \
2638        designation is ASCII to REG, we should keep this designation     \
2639        sequence.  */                                                    \
2640     if (prev == -2 && id == charset_ascii)                              \
2641       goto invalid_code;                                                \
2642   } while (0)
2643
2644
2645 #define MAYBE_FINISH_COMPOSITION()                              \
2646   do {                                                          \
2647     int i;                                                      \
2648     if (composition_state == COMPOSING_NO)                      \
2649       break;                                                    \
2650     /* It is assured that we have enough room for producing     \
2651        characters stored in the table `components'.  */         \
2652     if (charbuf + component_idx > charbuf_end)                  \
2653       goto no_more_source;                                      \
2654     composition_state = COMPOSING_NO;                           \
2655     if (method == COMPOSITION_RELATIVE                          \
2656         || method == COMPOSITION_WITH_ALTCHARS)                 \
2657       {                                                         \
2658         for (i = 0; i < component_idx; i++)                     \
2659           *charbuf++ = components[i];                           \
2660         char_offset += component_idx;                           \
2661       }                                                         \
2662     else                                                        \
2663       {                                                         \
2664         for (i = 0; i < component_idx; i += 2)                  \
2665           *charbuf++ = components[i];                           \
2666         char_offset += (component_idx / 2) + 1;                 \
2667       }                                                         \
2668   } while (0)
2669
2670
2671 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2672    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2673    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2674    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2675    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2676   */
2677
2678 #define DECODE_COMPOSITION_START(c1)                                    \
2679   do {                                                                  \
2680     if (c1 == '0'                                                       \
2681         && composition_state == COMPOSING_COMPONENT_RULE)               \
2682       {                                                                 \
2683         component_len = component_idx;                                  \
2684         composition_state = COMPOSING_CHAR;                             \
2685       }                                                                 \
2686     else                                                                \
2687       {                                                                 \
2688         unsigned char *p;                                               \
2689                                                                         \
2690         MAYBE_FINISH_COMPOSITION ();                                    \
2691         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
2692           goto no_more_source;                                          \
2693         for (p = src; p < src_end - 1; p++)                             \
2694           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
2695             break;                                                      \
2696         if (p == src_end - 1)                                           \
2697           {                                                             \
2698             if (coding->mode & CODING_MODE_LAST_BLOCK)                  \
2699               goto invalid_code;                                        \
2700             goto no_more_source;                                        \
2701           }                                                             \
2702                                                                         \
2703         /* This is surely the start of a composition.  */               \
2704         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
2705                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
2706                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
2707                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
2708         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
2709                              : COMPOSING_COMPONENT_CHAR);               \
2710         component_idx = component_len = 0;                              \
2711       }                                                                 \
2712   } while (0)
2713
2714
2715 /* Handle compositoin end sequence ESC 1.  */
2716
2717 #define DECODE_COMPOSITION_END()                                        \
2718   do {                                                                  \
2719     int nchars = (component_len > 0 ? component_idx - component_len     \
2720                   : method == COMPOSITION_RELATIVE ? component_idx      \
2721                   : (component_idx + 1) / 2);                           \
2722     int i;                                                              \
2723     int *saved_charbuf = charbuf;                                       \
2724     int from = coding->produced_char + char_offset;                     \
2725     int to = from + nchars;                                             \
2726                                                                         \
2727     ADD_COMPOSITION_DATA (charbuf, from, to, method);                   \
2728     if (method != COMPOSITION_RELATIVE)                                 \
2729       {                                                                 \
2730         if (component_len == 0)                                         \
2731           for (i = 0; i < component_idx; i++)                           \
2732             *charbuf++ = components[i];                                 \
2733         else                                                            \
2734           for (i = 0; i < component_len; i++)                           \
2735             *charbuf++ = components[i];                                 \
2736         *saved_charbuf = saved_charbuf - charbuf;                       \
2737       }                                                                 \
2738     if (method == COMPOSITION_WITH_RULE)                                \
2739       for (i = 0; i < component_idx; i += 2, char_offset++)             \
2740         *charbuf++ = components[i];                                     \
2741     else                                                                \
2742       for (i = component_len; i < component_idx; i++, char_offset++)    \
2743         *charbuf++ = components[i];                                     \
2744     coding->annotated = 1;                                              \
2745     composition_state = COMPOSING_NO;                                   \
2746   } while (0)
2747
2748
2749 /* Decode a composition rule from the byte C1 (and maybe one more byte
2750    from SRC) and store one encoded composition rule in
2751    coding->cmp_data.  */
2752
2753 #define DECODE_COMPOSITION_RULE(c1)                                     \
2754   do {                                                                  \
2755     (c1) -= 32;                                                         \
2756     if (c1 < 81)                /* old format (before ver.21) */        \
2757       {                                                                 \
2758         int gref = (c1) / 9;                                            \
2759         int nref = (c1) % 9;                                            \
2760         if (gref == 4) gref = 10;                                       \
2761         if (nref == 4) nref = 10;                                       \
2762         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
2763       }                                                                 \
2764     else if (c1 < 93)           /* new format (after ver.21) */         \
2765       {                                                                 \
2766         ONE_MORE_BYTE (c2);                                             \
2767         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
2768       }                                                                 \
2769     else                                                                \
2770       c1 = 0;                                                           \
2771   } while (0)
2772
2773
2774 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2775
2776 static void
2777 decode_coding_iso_2022 (coding)
2778      struct coding_system *coding;
2779 {
2780   unsigned char *src = coding->source + coding->consumed;
2781   unsigned char *src_end = coding->source + coding->src_bytes;
2782   unsigned char *src_base;
2783   int *charbuf = coding->charbuf;
2784   int *charbuf_end
2785     = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
2786   int consumed_chars = 0, consumed_chars_base;
2787   int multibytep = coding->src_multibyte;
2788   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
2789   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2790   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2791   struct charset *charset;
2792   int c;
2793   /* For handling composition sequence.  */
2794 #define COMPOSING_NO                    0
2795 #define COMPOSING_CHAR                  1
2796 #define COMPOSING_RULE                  2
2797 #define COMPOSING_COMPONENT_CHAR        3
2798 #define COMPOSING_COMPONENT_RULE        4
2799
2800   int composition_state = COMPOSING_NO;
2801   enum composition_method method;
2802   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2803   int component_idx;
2804   int component_len;
2805   Lisp_Object attrs, eol_type, charset_list;
2806   int char_offset = coding->produced_char;
2807   int last_offset = char_offset;
2808   int last_id = charset_ascii;
2809
2810   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2811   setup_iso_safe_charsets (attrs);
2812
2813   while (1)
2814     {
2815       int c1, c2;
2816
2817       src_base = src;
2818       consumed_chars_base = consumed_chars;
2819
2820       if (charbuf >= charbuf_end)
2821         break;
2822
2823       ONE_MORE_BYTE (c1);
2824
2825       /* We produce at most one character.  */
2826       switch (iso_code_class [c1])
2827         {
2828         case ISO_0x20_or_0x7F:
2829           if (composition_state != COMPOSING_NO)
2830             {
2831               if (composition_state == COMPOSING_RULE
2832                   || composition_state == COMPOSING_COMPONENT_RULE)
2833                 {
2834                   DECODE_COMPOSITION_RULE (c1);
2835                   components[component_idx++] = c1;
2836                   composition_state--;
2837                   continue;
2838                 }
2839             }
2840           if (charset_id_0 < 0
2841               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2842             /* This is SPACE or DEL.  */
2843             charset = CHARSET_FROM_ID (charset_ascii);
2844           else
2845             charset = CHARSET_FROM_ID (charset_id_0);
2846           break;
2847
2848         case ISO_graphic_plane_0:
2849           if (composition_state != COMPOSING_NO)
2850             {
2851               if (composition_state == COMPOSING_RULE
2852                   || composition_state == COMPOSING_COMPONENT_RULE)
2853                 {
2854                   DECODE_COMPOSITION_RULE (c1);
2855                   components[component_idx++] = c1;
2856                   composition_state--;
2857                   continue;
2858                 }
2859             }
2860           charset = CHARSET_FROM_ID (charset_id_0);
2861           break;
2862
2863         case ISO_0xA0_or_0xFF:
2864           if (charset_id_1 < 0
2865               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2866               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2867             goto invalid_code;
2868           /* This is a graphic character, we fall down ... */
2869
2870         case ISO_graphic_plane_1:
2871           if (charset_id_1 < 0)
2872             goto invalid_code;
2873           charset = CHARSET_FROM_ID (charset_id_1);
2874           break;
2875
2876         case ISO_carriage_return:
2877           if (c1 == '\r')
2878             {
2879               if (EQ (eol_type, Qdos))
2880                 {
2881                   if (src == src_end)
2882                     {
2883                       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
2884                       goto no_more_source;
2885                     }
2886                   if (*src == '\n')
2887                     ONE_MORE_BYTE (c1);
2888                 }
2889               else if (EQ (eol_type, Qmac))
2890                 c1 = '\n';
2891             }
2892           /* fall through */
2893
2894         case ISO_control_0:
2895           MAYBE_FINISH_COMPOSITION ();
2896           charset = CHARSET_FROM_ID (charset_ascii);
2897           break;
2898
2899         case ISO_control_1:
2900           MAYBE_FINISH_COMPOSITION ();
2901           goto invalid_code;
2902
2903         case ISO_shift_out:
2904           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2905               || CODING_ISO_DESIGNATION (coding, 1) < 0)
2906             goto invalid_code;
2907           CODING_ISO_INVOCATION (coding, 0) = 1;
2908           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2909           continue;
2910
2911         case ISO_shift_in:
2912           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2913             goto invalid_code;
2914           CODING_ISO_INVOCATION (coding, 0) = 0;
2915           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2916           continue;
2917
2918         case ISO_single_shift_2_7:
2919         case ISO_single_shift_2:
2920           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2921             goto invalid_code;
2922           /* SS2 is handled as an escape sequence of ESC 'N' */
2923           c1 = 'N';
2924           goto label_escape_sequence;
2925
2926         case ISO_single_shift_3:
2927           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2928             goto invalid_code;
2929           /* SS2 is handled as an escape sequence of ESC 'O' */
2930           c1 = 'O';
2931           goto label_escape_sequence;
2932
2933         case ISO_control_sequence_introducer:
2934           /* CSI is handled as an escape sequence of ESC '[' ...  */
2935           c1 = '[';
2936           goto label_escape_sequence;
2937
2938         case ISO_escape:
2939           ONE_MORE_BYTE (c1);
2940         label_escape_sequence:
2941           /* Escape sequences handled here are invocation,
2942              designation, direction specification, and character
2943              composition specification.  */
2944           switch (c1)
2945             {
2946             case '&':           /* revision of following character set */
2947               ONE_MORE_BYTE (c1);
2948               if (!(c1 >= '@' && c1 <= '~'))
2949                 goto invalid_code;
2950               ONE_MORE_BYTE (c1);
2951               if (c1 != ISO_CODE_ESC)
2952                 goto invalid_code;
2953               ONE_MORE_BYTE (c1);
2954               goto label_escape_sequence;
2955
2956             case '$':           /* designation of 2-byte character set */
2957               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2958                 goto invalid_code;
2959               ONE_MORE_BYTE (c1);
2960               if (c1 >= '@' && c1 <= 'B')
2961                 {       /* designation of JISX0208.1978, GB2312.1980,
2962                            or JISX0208.1980 */
2963                   DECODE_DESIGNATION (0, 2, 0, c1);
2964                 }
2965               else if (c1 >= 0x28 && c1 <= 0x2B)
2966                 {       /* designation of DIMENSION2_CHARS94 character set */
2967                   ONE_MORE_BYTE (c2);
2968                   DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
2969                 }
2970               else if (c1 >= 0x2C && c1 <= 0x2F)
2971                 {       /* designation of DIMENSION2_CHARS96 character set */
2972                   ONE_MORE_BYTE (c2);
2973                   DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
2974                 }
2975               else
2976                 goto invalid_code;
2977               /* We must update these variables now.  */
2978               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2979               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2980               continue;
2981
2982             case 'n':           /* invocation of locking-shift-2 */
2983               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2984                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
2985                 goto invalid_code;
2986               CODING_ISO_INVOCATION (coding, 0) = 2;
2987               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2988               continue;
2989
2990             case 'o':           /* invocation of locking-shift-3 */
2991               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2992                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
2993                 goto invalid_code;
2994               CODING_ISO_INVOCATION (coding, 0) = 3;
2995               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2996               continue;
2997
2998             case 'N':           /* invocation of single-shift-2 */
2999               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3000                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3001                 goto invalid_code;
3002               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
3003               ONE_MORE_BYTE (c1);
3004               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3005                 goto invalid_code;
3006               break;
3007
3008             case 'O':           /* invocation of single-shift-3 */
3009               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3010                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3011                 goto invalid_code;
3012               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
3013               ONE_MORE_BYTE (c1);
3014               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3015                 goto invalid_code;
3016               break;
3017
3018             case '0': case '2': case '3': case '4': /* start composition */
3019               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3020                 goto invalid_code;
3021               DECODE_COMPOSITION_START (c1);
3022               continue;
3023
3024             case '1':           /* end composition */
3025               if (composition_state == COMPOSING_NO)
3026                 goto invalid_code;
3027               DECODE_COMPOSITION_END ();
3028               continue;
3029
3030             case '[':           /* specification of direction */
3031               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3032                 goto invalid_code;
3033               /* For the moment, nested direction is not supported.
3034                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3035                  left-to-right, and nozero means right-to-left.  */
3036               ONE_MORE_BYTE (c1);
3037               switch (c1)
3038                 {
3039                 case ']':       /* end of the current direction */
3040                   coding->mode &= ~CODING_MODE_DIRECTION;
3041
3042                 case '0':       /* end of the current direction */
3043                 case '1':       /* start of left-to-right direction */
3044                   ONE_MORE_BYTE (c1);
3045                   if (c1 == ']')
3046                     coding->mode &= ~CODING_MODE_DIRECTION;
3047                   else
3048                     goto invalid_code;
3049                   break;
3050
3051                 case '2':       /* start of right-to-left direction */
3052                   ONE_MORE_BYTE (c1);
3053                   if (c1 == ']')
3054                     coding->mode |= CODING_MODE_DIRECTION;
3055                   else
3056                     goto invalid_code;
3057                   break;
3058
3059                 default:
3060                   goto invalid_code;
3061                 }
3062               continue;
3063
3064             case '%':
3065               ONE_MORE_BYTE (c1);
3066               if (c1 == '/')
3067                 {
3068                   /* CTEXT extended segment:
3069                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3070                      We keep these bytes as is for the moment.
3071                      They may be decoded by post-read-conversion.  */
3072                   int dim, M, L;
3073                   int size;
3074
3075                   ONE_MORE_BYTE (dim);
3076                   ONE_MORE_BYTE (M);
3077                   ONE_MORE_BYTE (L);
3078                   size = ((M - 128) * 128) + (L - 128);
3079                   if (charbuf + 8 + size > charbuf_end)
3080                     goto break_loop;
3081                   *charbuf++ = ISO_CODE_ESC;
3082                   *charbuf++ = '%';
3083                   *charbuf++ = '/';
3084                   *charbuf++ = dim;
3085                   *charbuf++ = BYTE8_TO_CHAR (M);
3086                   *charbuf++ = BYTE8_TO_CHAR (L);
3087                   while (size-- > 0)
3088                     {
3089                       ONE_MORE_BYTE (c1);
3090                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3091                     }
3092                 }
3093               else if (c1 == 'G')
3094                 {
3095                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3096                      ESC % G --UTF-8-BYTES-- ESC % @
3097                      We keep these bytes as is for the moment.
3098                      They may be decoded by post-read-conversion.  */
3099                   int *p = charbuf;
3100
3101                   if (p + 6 > charbuf_end)
3102                     goto break_loop;
3103                   *p++ = ISO_CODE_ESC;
3104                   *p++ = '%';
3105                   *p++ = 'G';
3106                   while (p < charbuf_end)
3107                     {
3108                       ONE_MORE_BYTE (c1);
3109                       if (c1 == ISO_CODE_ESC
3110                           && src + 1 < src_end
3111                           && src[0] == '%'
3112                           && src[1] == '@')
3113                         break;
3114                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3115                     }
3116                   if (p + 3 > charbuf_end)
3117                     goto break_loop;
3118                   *p++ = ISO_CODE_ESC;
3119                   *p++ = '%';
3120                   *p++ = '@';
3121                   charbuf = p;
3122                 }
3123               else
3124                 goto invalid_code;
3125               continue;
3126               break;
3127
3128             default:
3129               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3130                 goto invalid_code;
3131               if (c1 >= 0x28 && c1 <= 0x2B)
3132                 {       /* designation of DIMENSION1_CHARS94 character set */
3133                   ONE_MORE_BYTE (c2);
3134                   DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
3135                 }
3136               else if (c1 >= 0x2C && c1 <= 0x2F)
3137                 {       /* designation of DIMENSION1_CHARS96 character set */
3138                   ONE_MORE_BYTE (c2);
3139                   DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
3140                 }
3141               else
3142                 goto invalid_code;
3143               /* We must update these variables now.  */
3144               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3145               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3146               continue;
3147             }
3148         }
3149
3150       if (charset->id != charset_ascii
3151           && last_id != charset->id)
3152         {
3153           if (last_id != charset_ascii)
3154             ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3155           last_id = charset->id;
3156           last_offset = char_offset;
3157         }
3158
3159       /* Now we know CHARSET and 1st position code C1 of a character.
3160          Produce a decoded character while getting 2nd position code
3161          C2 if necessary.  */
3162       c1 &= 0x7F;
3163       if (CHARSET_DIMENSION (charset) > 1)
3164         {
3165           ONE_MORE_BYTE (c2);
3166           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3167             /* C2 is not in a valid range.  */
3168             goto invalid_code;
3169           c1 = (c1 << 8) | (c2 & 0x7F);
3170           if (CHARSET_DIMENSION (charset) > 2)
3171             {
3172               ONE_MORE_BYTE (c2);
3173               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3174                 /* C2 is not in a valid range.  */
3175                 goto invalid_code;
3176               c1 = (c1 << 8) | (c2 & 0x7F);
3177             }
3178         }
3179
3180       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3181       if (c < 0)
3182         {
3183           MAYBE_FINISH_COMPOSITION ();
3184           for (; src_base < src; src_base++, char_offset++)
3185             {
3186               if (ASCII_BYTE_P (*src_base))
3187                 *charbuf++ = *src_base;
3188               else
3189                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3190             }
3191         }
3192       else if (composition_state == COMPOSING_NO)
3193         {
3194           *charbuf++ = c;
3195           char_offset++;
3196         }
3197       else
3198         {
3199           components[component_idx++] = c;
3200           if (method == COMPOSITION_WITH_RULE
3201               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3202                   && composition_state == COMPOSING_COMPONENT_CHAR))
3203             composition_state++;
3204         }
3205       continue;
3206
3207     invalid_code:
3208       MAYBE_FINISH_COMPOSITION ();
3209       src = src_base;
3210       consumed_chars = consumed_chars_base;
3211       ONE_MORE_BYTE (c);
3212       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3213       char_offset++;
3214       coding->errors++;
3215       continue;
3216
3217     break_loop:
3218       break;
3219     }
3220
3221  no_more_source:
3222   if (last_id != charset_ascii)
3223     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3224   coding->consumed_char += consumed_chars_base;
3225   coding->consumed = src_base - coding->source;
3226   coding->charbuf_used = charbuf - coding->charbuf;
3227 }
3228
3229
3230 /* ISO2022 encoding stuff.  */
3231
3232 /*
3233    It is not enough to say just "ISO2022" on encoding, we have to
3234    specify more details.  In Emacs, each coding system of ISO2022
3235    variant has the following specifications:
3236         1. Initial designation to G0 thru G3.
3237         2. Allows short-form designation?
3238         3. ASCII should be designated to G0 before control characters?
3239         4. ASCII should be designated to G0 at end of line?
3240         5. 7-bit environment or 8-bit environment?
3241         6. Use locking-shift?
3242         7. Use Single-shift?
3243    And the following two are only for Japanese:
3244         8. Use ASCII in place of JIS0201-1976-Roman?
3245         9. Use JISX0208-1983 in place of JISX0208-1978?
3246    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3247    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3248    details.
3249 */
3250
3251 /* Produce codes (escape sequence) for designating CHARSET to graphic
3252    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3253    '@', 'A', or 'B' and the coding system CODING allows, produce
3254    designation sequence of short-form.  */
3255
3256 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3257   do {                                                                  \
3258     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3259     char *intermediate_char_94 = "()*+";                                \
3260     char *intermediate_char_96 = ",-./";                                \
3261     int revision = -1;                                                  \
3262     int c;                                                              \
3263                                                                         \
3264     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3265       revision = CHARSET_ISO_REVISION (charset);                        \
3266                                                                         \
3267     if (revision >= 0)                                                  \
3268       {                                                                 \
3269         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3270         EMIT_ONE_BYTE ('@' + revision);                                 \
3271       }                                                                 \
3272     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3273     if (CHARSET_DIMENSION (charset) == 1)                               \
3274       {                                                                 \
3275         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3276           c = intermediate_char_94[reg];                                \
3277         else                                                            \
3278           c = intermediate_char_96[reg];                                \
3279         EMIT_ONE_ASCII_BYTE (c);                                        \
3280       }                                                                 \
3281     else                                                                \
3282       {                                                                 \
3283         EMIT_ONE_ASCII_BYTE ('$');                                      \
3284         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3285           {                                                             \
3286             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3287                 || reg != 0                                             \
3288                 || final_char < '@' || final_char > 'B')                \
3289               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3290           }                                                             \
3291         else                                                            \
3292           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3293       }                                                                 \
3294     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3295                                                                         \
3296     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3297   } while (0)
3298
3299
3300 /* The following two macros produce codes (control character or escape
3301    sequence) for ISO2022 single-shift functions (single-shift-2 and
3302    single-shift-3).  */
3303
3304 #define ENCODE_SINGLE_SHIFT_2                                           \
3305   do {                                                                  \
3306     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3307       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3308     else                                                                \
3309       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3310     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3311   } while (0)
3312
3313
3314 #define ENCODE_SINGLE_SHIFT_3                                           \
3315   do {                                                                  \
3316     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3317       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3318     else                                                                \
3319       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3320     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3321   } while (0)
3322
3323
3324 /* The following four macros produce codes (control character or
3325    escape sequence) for ISO2022 locking-shift functions (shift-in,
3326    shift-out, locking-shift-2, and locking-shift-3).  */
3327
3328 #define ENCODE_SHIFT_IN                                 \
3329   do {                                                  \
3330     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3331     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3332   } while (0)
3333
3334
3335 #define ENCODE_SHIFT_OUT                                \
3336   do {                                                  \
3337     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3338     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3339   } while (0)
3340
3341
3342 #define ENCODE_LOCKING_SHIFT_2                          \
3343   do {                                                  \
3344     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3345     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3346   } while (0)
3347
3348
3349 #define ENCODE_LOCKING_SHIFT_3                          \
3350   do {                                                  \
3351     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3352     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3353   } while (0)
3354
3355
3356 /* Produce codes for a DIMENSION1 character whose character set is
3357    CHARSET and whose position-code is C1.  Designation and invocation
3358    sequences are also produced in advance if necessary.  */
3359
3360 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3361   do {                                                                  \
3362     int id = CHARSET_ID (charset);                                      \
3363                                                                         \
3364     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3365         && id == charset_ascii)                                         \
3366       {                                                                 \
3367         id = charset_jisx0201_roman;                                    \
3368         charset = CHARSET_FROM_ID (id);                                 \
3369       }                                                                 \
3370                                                                         \
3371     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3372       {                                                                 \
3373         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3374           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3375         else                                                            \
3376           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3377         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3378         break;                                                          \
3379       }                                                                 \
3380     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3381       {                                                                 \
3382         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3383         break;                                                          \
3384       }                                                                 \
3385     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3386       {                                                                 \
3387         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3388         break;                                                          \
3389       }                                                                 \
3390     else                                                                \
3391       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3392          must invoke it, or, at first, designate it to some graphic     \
3393          register.  Then repeat the loop to actually produce the        \
3394          character.  */                                                 \
3395       dst = encode_invocation_designation (charset, coding, dst,        \
3396                                            &produced_chars);            \
3397   } while (1)
3398
3399
3400 /* Produce codes for a DIMENSION2 character whose character set is
3401    CHARSET and whose position-codes are C1 and C2.  Designation and
3402    invocation codes are also produced in advance if necessary.  */
3403
3404 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3405   do {                                                                  \
3406     int id = CHARSET_ID (charset);                                      \
3407                                                                         \
3408     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3409         && id == charset_jisx0208)                                      \
3410       {                                                                 \
3411         id = charset_jisx0208_1978;                                     \
3412         charset = CHARSET_FROM_ID (id);                                 \
3413       }                                                                 \
3414                                                                         \
3415     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3416       {                                                                 \
3417         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3418           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3419         else                                                            \
3420           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3421         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3422         break;                                                          \
3423       }                                                                 \
3424     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3425       {                                                                 \
3426         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3427         break;                                                          \
3428       }                                                                 \
3429     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3430       {                                                                 \
3431         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3432         break;                                                          \
3433       }                                                                 \
3434     else                                                                \
3435       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3436          must invoke it, or, at first, designate it to some graphic     \
3437          register.  Then repeat the loop to actually produce the        \
3438          character.  */                                                 \
3439       dst = encode_invocation_designation (charset, coding, dst,        \
3440                                            &produced_chars);            \
3441   } while (1)
3442
3443
3444 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3445   do {                                                                     \
3446     int code = ENCODE_CHAR ((charset),(c));                                \
3447                                                                            \
3448     if (CHARSET_DIMENSION (charset) == 1)                                  \
3449       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3450     else                                                                   \
3451       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3452   } while (0)
3453
3454
3455 /* Produce designation and invocation codes at a place pointed by DST
3456    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3457    Return new DST.  */
3458
3459 unsigned char *
3460 encode_invocation_designation (charset, coding, dst, p_nchars)
3461      struct charset *charset;
3462      struct coding_system *coding;
3463      unsigned char *dst;
3464      int *p_nchars;
3465 {
3466   int multibytep = coding->dst_multibyte;
3467   int produced_chars = *p_nchars;
3468   int reg;                      /* graphic register number */
3469   int id = CHARSET_ID (charset);
3470
3471   /* At first, check designations.  */
3472   for (reg = 0; reg < 4; reg++)
3473     if (id == CODING_ISO_DESIGNATION (coding, reg))
3474       break;
3475
3476   if (reg >= 4)
3477     {
3478       /* CHARSET is not yet designated to any graphic registers.  */
3479       /* At first check the requested designation.  */
3480       reg = CODING_ISO_REQUEST (coding, id);
3481       if (reg < 0)
3482         /* Since CHARSET requests no special designation, designate it
3483            to graphic register 0.  */
3484         reg = 0;
3485
3486       ENCODE_DESIGNATION (charset, reg, coding);
3487     }
3488
3489   if (CODING_ISO_INVOCATION (coding, 0) != reg
3490       && CODING_ISO_INVOCATION (coding, 1) != reg)
3491     {
3492       /* Since the graphic register REG is not invoked to any graphic
3493          planes, invoke it to graphic plane 0.  */
3494       switch (reg)
3495         {
3496         case 0:                 /* graphic register 0 */
3497           ENCODE_SHIFT_IN;
3498           break;
3499
3500         case 1:                 /* graphic register 1 */
3501           ENCODE_SHIFT_OUT;
3502           break;
3503
3504         case 2:                 /* graphic register 2 */
3505           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3506             ENCODE_SINGLE_SHIFT_2;
3507           else
3508             ENCODE_LOCKING_SHIFT_2;
3509           break;
3510
3511         case 3:                 /* graphic register 3 */
3512           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3513             ENCODE_SINGLE_SHIFT_3;
3514           else
3515             ENCODE_LOCKING_SHIFT_3;
3516           break;
3517         }
3518     }
3519
3520   *p_nchars = produced_chars;
3521   return dst;
3522 }
3523
3524 /* The following three macros produce codes for indicating direction
3525    of text.  */
3526 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3527   do {                                                                  \
3528     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3529       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3530     else                                                                \
3531       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3532   } while (0)
3533
3534
3535 #define ENCODE_DIRECTION_R2L()                  \
3536   do {                                          \
3537     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3538     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3539   } while (0)
3540
3541
3542 #define ENCODE_DIRECTION_L2R()                  \
3543   do {                                          \
3544     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3545     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3546   } while (0)
3547
3548
3549 /* Produce codes for designation and invocation to reset the graphic
3550    planes and registers to initial state.  */
3551 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3552   do {                                                                  \
3553     int reg;                                                            \
3554     struct charset *charset;                                            \
3555                                                                         \
3556     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3557       ENCODE_SHIFT_IN;                                                  \
3558     for (reg = 0; reg < 4; reg++)                                       \
3559       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3560           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3561               != CODING_ISO_INITIAL (coding, reg)))                     \
3562         {                                                               \
3563           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3564           ENCODE_DESIGNATION (charset, reg, coding);                    \
3565         }                                                               \
3566   } while (0)
3567
3568
3569 /* Produce designation sequences of charsets in the line started from
3570    SRC to a place pointed by DST, and return updated DST.
3571
3572    If the current block ends before any end-of-line, we may fail to
3573    find all the necessary designations.  */
3574
3575 static unsigned char *
3576 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3577      struct coding_system *coding;
3578      int *charbuf, *charbuf_end;
3579      unsigned char *dst;
3580 {
3581   struct charset *charset;
3582   /* Table of charsets to be designated to each graphic register.  */
3583   int r[4];
3584   int c, found = 0, reg;
3585   int produced_chars = 0;
3586   int multibytep = coding->dst_multibyte;
3587   Lisp_Object attrs;
3588   Lisp_Object charset_list;
3589
3590   attrs = CODING_ID_ATTRS (coding->id);
3591   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3592   if (EQ (charset_list, Qiso_2022))
3593     charset_list = Viso_2022_charset_list;
3594
3595   for (reg = 0; reg < 4; reg++)
3596     r[reg] = -1;
3597
3598   while (found < 4)
3599     {
3600       int id;
3601
3602       c = *charbuf++;
3603       if (c == '\n')
3604         break;
3605       charset = char_charset (c, charset_list, NULL);
3606       id = CHARSET_ID (charset);
3607       reg = CODING_ISO_REQUEST (coding, id);
3608       if (reg >= 0 && r[reg] < 0)
3609         {
3610           found++;
3611           r[reg] = id;
3612         }
3613     }
3614
3615   if (found)
3616     {
3617       for (reg = 0; reg < 4; reg++)
3618         if (r[reg] >= 0
3619             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3620           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3621     }
3622
3623   return dst;
3624 }
3625
3626 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
3627
3628 static int
3629 encode_coding_iso_2022 (coding)
3630      struct coding_system *coding;
3631 {
3632   int multibytep = coding->dst_multibyte;
3633   int *charbuf = coding->charbuf;
3634   int *charbuf_end = charbuf + coding->charbuf_used;
3635   unsigned char *dst = coding->destination + coding->produced;
3636   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3637   int safe_room = 16;
3638   int bol_designation
3639     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3640        && CODING_ISO_BOL (coding));
3641   int produced_chars = 0;
3642   Lisp_Object attrs, eol_type, charset_list;
3643   int ascii_compatible;
3644   int c;
3645   int preferred_charset_id = -1;
3646
3647   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3648   setup_iso_safe_charsets (attrs);
3649   /* Charset list may have been changed.  */
3650   charset_list = CODING_ATTR_CHARSET_LIST (attrs);              \
3651   coding->safe_charsets
3652     = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
3653
3654   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3655
3656   while (charbuf < charbuf_end)
3657     {
3658       ASSURE_DESTINATION (safe_room);
3659
3660       if (bol_designation)
3661         {
3662           unsigned char *dst_prev = dst;
3663
3664           /* We have to produce designation sequences if any now.  */
3665           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3666           bol_designation = 0;
3667           /* We are sure that designation sequences are all ASCII bytes.  */
3668           produced_chars += dst - dst_prev;
3669         }
3670
3671       c = *charbuf++;
3672
3673       if (c < 0)
3674         {
3675           /* Handle an annotation.  */
3676           switch (*charbuf)
3677             {
3678             case CODING_ANNOTATE_COMPOSITION_MASK:
3679               /* Not yet implemented.  */
3680               break;
3681             case CODING_ANNOTATE_CHARSET_MASK:
3682               preferred_charset_id = charbuf[3];
3683               if (preferred_charset_id >= 0
3684                   && NILP (Fmemq (make_number (preferred_charset_id),
3685                                   charset_list)))
3686                 preferred_charset_id = -1;
3687               break;
3688             default:
3689               abort ();
3690             }
3691           charbuf += -c - 1;
3692           continue;
3693         }
3694
3695       /* Now encode the character C.  */
3696       if (c < 0x20 || c == 0x7F)
3697         {
3698           if (c == '\n'
3699               || (c == '\r' && EQ (eol_type, Qmac)))
3700             {
3701               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3702                 ENCODE_RESET_PLANE_AND_REGISTER ();
3703               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3704                 {
3705                   int i;
3706
3707                   for (i = 0; i < 4; i++)
3708                     CODING_ISO_DESIGNATION (coding, i)
3709                       = CODING_ISO_INITIAL (coding, i);
3710                 }
3711               bol_designation
3712                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3713             }
3714           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3715             ENCODE_RESET_PLANE_AND_REGISTER ();
3716           EMIT_ONE_ASCII_BYTE (c);
3717         }
3718       else if (ASCII_CHAR_P (c))
3719         {
3720           if (ascii_compatible)
3721             EMIT_ONE_ASCII_BYTE (c);
3722           else
3723             {
3724               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3725               ENCODE_ISO_CHARACTER (charset, c);
3726             }
3727         }
3728       else if (CHAR_BYTE8_P (c))
3729         {
3730           c = CHAR_TO_BYTE8 (c);
3731           EMIT_ONE_BYTE (c);
3732         }
3733       else
3734         {
3735           struct charset *charset;
3736
3737           if (preferred_charset_id >= 0)
3738             {
3739               charset = CHARSET_FROM_ID (preferred_charset_id);
3740               if (! CHAR_CHARSET_P (c, charset))
3741                 charset = char_charset (c, charset_list, NULL);
3742             }
3743           else
3744             charset = char_charset (c, charset_list, NULL);
3745           if (!charset)
3746             {
3747               if (coding->mode & CODING_MODE_SAFE_ENCODING)
3748                 {
3749                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3750                   charset = CHARSET_FROM_ID (charset_ascii);
3751                 }
3752               else
3753                 {
3754                   c = coding->default_char;
3755                   charset = char_charset (c, charset_list, NULL);
3756                 }
3757             }
3758           ENCODE_ISO_CHARACTER (charset, c);
3759         }
3760     }
3761
3762   if (coding->mode & CODING_MODE_LAST_BLOCK
3763       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3764     {
3765       ASSURE_DESTINATION (safe_room);
3766       ENCODE_RESET_PLANE_AND_REGISTER ();
3767     }
3768   coding->result = CODING_RESULT_SUCCESS;
3769   CODING_ISO_BOL (coding) = bol_designation;
3770   coding->produced_char += produced_chars;
3771   coding->produced = dst - coding->destination;
3772   return 0;
3773 }
3774
3775 \f
3776 /*** 8,9. SJIS and BIG5 handlers ***/
3777
3778 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3779    quite widely.  So, for the moment, Emacs supports them in the bare
3780    C code.  But, in the future, they may be supported only by CCL.  */
3781
3782 /* SJIS is a coding system encoding three character sets: ASCII, right
3783    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
3784    as is.  A character of charset katakana-jisx0201 is encoded by
3785    "position-code + 0x80".  A character of charset japanese-jisx0208
3786    is encoded in 2-byte but two position-codes are divided and shifted
3787    so that it fit in the range below.
3788
3789    --- CODE RANGE of SJIS ---
3790    (character set)      (range)
3791    ASCII                0x00 .. 0x7F
3792    KATAKANA-JISX0201    0xA0 .. 0xDF
3793    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
3794             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
3795    -------------------------------
3796
3797 */
3798
3799 /* BIG5 is a coding system encoding two character sets: ASCII and
3800    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
3801    character set and is encoded in two-byte.
3802
3803    --- CODE RANGE of BIG5 ---
3804    (character set)      (range)
3805    ASCII                0x00 .. 0x7F
3806    Big5 (1st byte)      0xA1 .. 0xFE
3807         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
3808    --------------------------
3809
3810   */
3811
3812 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3813    Check if a text is encoded in SJIS.  If it is, return
3814    CATEGORY_MASK_SJIS, else return 0.  */
3815
3816 static int
3817 detect_coding_sjis (coding, detect_info)
3818      struct coding_system *coding;
3819      struct coding_detection_info *detect_info;
3820 {
3821   unsigned char *src = coding->source, *src_base = src;
3822   unsigned char *src_end = coding->source + coding->src_bytes;
3823   int multibytep = coding->src_multibyte;
3824   int consumed_chars = 0;
3825   int found = 0;
3826   int c;
3827   int incomplete;
3828
3829   detect_info->checked |= CATEGORY_MASK_SJIS;
3830   /* A coding system of this category is always ASCII compatible.  */
3831   src += coding->head_ascii;
3832
3833   while (1)
3834     {
3835       incomplete = 0;
3836       ONE_MORE_BYTE (c);
3837       incomplete = 1;
3838       if (c < 0x80)
3839         continue;
3840       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3841         {
3842           ONE_MORE_BYTE (c);
3843           if (c < 0x40 || c == 0x7F || c > 0xFC)
3844             break;
3845           found = CATEGORY_MASK_SJIS;
3846         }
3847       else if (c >= 0xA0 && c < 0xE0)
3848         found = CATEGORY_MASK_SJIS;
3849       else
3850         break;
3851     }
3852   detect_info->rejected |= CATEGORY_MASK_SJIS;
3853   return 0;
3854
3855  no_more_source:
3856   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3857     {
3858       detect_info->rejected |= CATEGORY_MASK_SJIS;
3859       return 0;
3860     }
3861   detect_info->found |= found;
3862   return 1;
3863 }
3864
3865 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3866    Check if a text is encoded in BIG5.  If it is, return
3867    CATEGORY_MASK_BIG5, else return 0.  */
3868
3869 static int
3870 detect_coding_big5 (coding, detect_info)
3871      struct coding_system *coding;
3872      struct coding_detection_info *detect_info;
3873 {
3874   unsigned char *src = coding->source, *src_base = src;
3875   unsigned char *src_end = coding->source + coding->src_bytes;
3876   int multibytep = coding->src_multibyte;
3877   int consumed_chars = 0;
3878   int found = 0;
3879   int c;
3880   int incomplete;
3881
3882   detect_info->checked |= CATEGORY_MASK_BIG5;
3883   /* A coding system of this category is always ASCII compatible.  */
3884   src += coding->head_ascii;
3885
3886   while (1)
3887     {
3888       incomplete = 0;
3889       ONE_MORE_BYTE (c);
3890       incomplete = 1;
3891       if (c < 0x80)
3892         continue;
3893       if (c >= 0xA1)
3894         {
3895           ONE_MORE_BYTE (c);
3896           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3897             return 0;
3898           found = CATEGORY_MASK_BIG5;
3899         }
3900       else
3901         break;
3902     }
3903   detect_info->rejected |= CATEGORY_MASK_BIG5;
3904   return 0;
3905
3906  no_more_source:
3907   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3908     {
3909       detect_info->rejected |= CATEGORY_MASK_BIG5;
3910       return 0;
3911     }
3912   detect_info->found |= found;
3913   return 1;
3914 }
3915
3916 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3917    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3918
3919 static void
3920 decode_coding_sjis (coding)
3921      struct coding_system *coding;
3922 {
3923   unsigned char *src = coding->source + coding->consumed;
3924   unsigned char *src_end = coding->source + coding->src_bytes;
3925   unsigned char *src_base;
3926   int *charbuf = coding->charbuf;
3927   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
3928   int consumed_chars = 0, consumed_chars_base;
3929   int multibytep = coding->src_multibyte;
3930   struct charset *charset_roman, *charset_kanji, *charset_kana;
3931   Lisp_Object attrs, eol_type, charset_list, val;
3932   int char_offset = coding->produced_char;
3933   int last_offset = char_offset;
3934   int last_id = charset_ascii;
3935
3936   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3937
3938   val = charset_list;
3939   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3940   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3941   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3942
3943   while (1)
3944     {
3945       int c, c1;
3946
3947       src_base = src;
3948       consumed_chars_base = consumed_chars;
3949
3950       if (charbuf >= charbuf_end)
3951         break;
3952
3953       ONE_MORE_BYTE (c);
3954
3955       if (c == '\r')
3956         {
3957           if (EQ (eol_type, Qdos))
3958             {
3959               if (src == src_end)
3960                 {
3961                   coding->result = CODING_RESULT_INSUFFICIENT_SRC;
3962                   goto no_more_source;
3963                 }
3964               if (*src == '\n')
3965                 ONE_MORE_BYTE (c);
3966             }
3967           else if (EQ (eol_type, Qmac))
3968             c = '\n';
3969         }
3970       else
3971         {
3972           struct charset *charset;
3973
3974           if (c < 0x80)
3975             charset = charset_roman;
3976           else
3977             {
3978               if (c >= 0xF0)
3979                 goto invalid_code;
3980               if (c < 0xA0 || c >= 0xE0)
3981                 {
3982                   /* SJIS -> JISX0208 */
3983                   ONE_MORE_BYTE (c1);
3984                   if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3985                     goto invalid_code;
3986                   c = (c << 8) | c1;
3987                   SJIS_TO_JIS (c);
3988                   charset = charset_kanji;
3989                 }
3990               else
3991                 {
3992                   /* SJIS -> JISX0201-Kana */
3993                   c &= 0x7F;
3994                   charset = charset_kana;
3995                 }
3996             }
3997           if (charset->id != charset_ascii
3998               && last_id != charset->id)
3999             {
4000               if (last_id != charset_ascii)
4001                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4002               last_id = charset->id;
4003               last_offset = char_offset;
4004             }
4005           CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4006         }
4007       *charbuf++ = c;
4008       char_offset++;
4009       continue;
4010
4011     invalid_code:
4012       src = src_base;
4013       consumed_chars = consumed_chars_base;
4014       ONE_MORE_BYTE (c);
4015       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4016       char_offset++;
4017       coding->errors++;
4018     }
4019
4020  no_more_source:
4021   if (last_id != charset_ascii)
4022     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4023   coding->consumed_char += consumed_chars_base;
4024   coding->consumed = src_base - coding->source;
4025   coding->charbuf_used = charbuf - coding->charbuf;
4026 }
4027
4028 static void
4029 decode_coding_big5 (coding)
4030      struct coding_system *coding;
4031 {
4032   unsigned char *src = coding->source + coding->consumed;
4033   unsigned char *src_end = coding->source + coding->src_bytes;
4034   unsigned char *src_base;
4035   int *charbuf = coding->charbuf;
4036   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4037   int consumed_chars = 0, consumed_chars_base;
4038   int multibytep = coding->src_multibyte;
4039   struct charset *charset_roman, *charset_big5;
4040   Lisp_Object attrs, eol_type, charset_list, val;
4041   int char_offset = coding->produced_char;
4042   int last_offset = char_offset;
4043   int last_id = charset_ascii;
4044
4045   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4046   val = charset_list;
4047   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4048   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4049
4050   while (1)
4051     {
4052       int c, c1;
4053
4054       src_base = src;
4055       consumed_chars_base = consumed_chars;
4056
4057       if (charbuf >= charbuf_end)
4058         break;
4059
4060       ONE_MORE_BYTE (c);
4061
4062       if (c == '\r')
4063         {
4064           if (EQ (eol_type, Qdos))
4065             {
4066               if (src == src_end)
4067                 {
4068                   coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4069                   goto no_more_source;
4070                 }
4071               if (*src == '\n')
4072                 ONE_MORE_BYTE (c);
4073             }
4074           else if (EQ (eol_type, Qmac))
4075             c = '\n';
4076         }
4077       else
4078         {
4079           struct charset *charset;
4080           if (c < 0x80)
4081             charset = charset_roman;
4082           else
4083             {
4084               /* BIG5 -> Big5 */
4085               if (c < 0xA1 || c > 0xFE)
4086                 goto invalid_code;
4087               ONE_MORE_BYTE (c1);
4088               if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4089                 goto invalid_code;
4090               c = c << 8 | c1;
4091               charset = charset_big5;
4092             }
4093           if (charset->id != charset_ascii
4094               && last_id != charset->id)
4095             {
4096               if (last_id != charset_ascii)
4097                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4098               last_id = charset->id;
4099               last_offset = char_offset;
4100             }
4101           CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4102         }
4103
4104       *charbuf++ = c;
4105       char_offset++;
4106       continue;
4107
4108     invalid_code:
4109       src = src_base;
4110       consumed_chars = consumed_chars_base;
4111       ONE_MORE_BYTE (c);
4112       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4113       char_offset++;
4114       coding->errors++;
4115     }
4116
4117  no_more_source:
4118   if (last_id != charset_ascii)
4119     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4120   coding->consumed_char += consumed_chars_base;
4121   coding->consumed = src_base - coding->source;
4122   coding->charbuf_used = charbuf - coding->charbuf;
4123 }
4124
4125 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4126    This function can encode charsets `ascii', `katakana-jisx0201',
4127    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4128    are sure that all these charsets are registered as official charset
4129    (i.e. do not have extended leading-codes).  Characters of other
4130    charsets are produced without any encoding.  If SJIS_P is 1, encode
4131    SJIS text, else encode BIG5 text.  */
4132
4133 static int
4134 encode_coding_sjis (coding)
4135      struct coding_system *coding;
4136 {
4137   int multibytep = coding->dst_multibyte;
4138   int *charbuf = coding->charbuf;
4139   int *charbuf_end = charbuf + coding->charbuf_used;
4140   unsigned char *dst = coding->destination + coding->produced;
4141   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4142   int safe_room = 4;
4143   int produced_chars = 0;
4144   Lisp_Object attrs, eol_type, charset_list, val;
4145   int ascii_compatible;
4146   struct charset *charset_roman, *charset_kanji, *charset_kana;
4147   int c;
4148
4149   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4150   val = charset_list;
4151   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4152   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4153   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4154
4155   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4156
4157   while (charbuf < charbuf_end)
4158     {
4159       ASSURE_DESTINATION (safe_room);
4160       c = *charbuf++;
4161       /* Now encode the character C.  */
4162       if (ASCII_CHAR_P (c) && ascii_compatible)
4163         EMIT_ONE_ASCII_BYTE (c);
4164       else if (CHAR_BYTE8_P (c))
4165         {
4166           c = CHAR_TO_BYTE8 (c);
4167           EMIT_ONE_BYTE (c);
4168         }
4169       else
4170         {
4171           unsigned code;
4172           struct charset *charset = char_charset (c, charset_list, &code);
4173
4174           if (!charset)
4175             {
4176               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4177                 {
4178                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4179                   charset = CHARSET_FROM_ID (charset_ascii);
4180                 }
4181               else
4182                 {
4183                   c = coding->default_char;
4184                   charset = char_charset (c, charset_list, &code);
4185                 }
4186             }
4187           if (code == CHARSET_INVALID_CODE (charset))
4188             abort ();
4189           if (charset == charset_kanji)
4190             {
4191               int c1, c2;
4192               JIS_TO_SJIS (code);
4193               c1 = code >> 8, c2 = code & 0xFF;
4194               EMIT_TWO_BYTES (c1, c2);
4195             }
4196           else if (charset == charset_kana)
4197             EMIT_ONE_BYTE (code | 0x80);
4198           else
4199             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4200         }
4201     }
4202   coding->result = CODING_RESULT_SUCCESS;
4203   coding->produced_char += produced_chars;
4204   coding->produced = dst - coding->destination;
4205   return 0;
4206 }
4207
4208 static int
4209 encode_coding_big5 (coding)
4210      struct coding_system *coding;
4211 {
4212   int multibytep = coding->dst_multibyte;
4213   int *charbuf = coding->charbuf;
4214   int *charbuf_end = charbuf + coding->charbuf_used;
4215   unsigned char *dst = coding->destination + coding->produced;
4216   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4217   int safe_room = 4;
4218   int produced_chars = 0;
4219   Lisp_Object attrs, eol_type, charset_list, val;
4220   int ascii_compatible;
4221   struct charset *charset_roman, *charset_big5;
4222   int c;
4223
4224   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4225   val = charset_list;
4226   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4227   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4228   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4229
4230   while (charbuf < charbuf_end)
4231     {
4232       ASSURE_DESTINATION (safe_room);
4233       c = *charbuf++;
4234       /* Now encode the character C.  */
4235       if (ASCII_CHAR_P (c) && ascii_compatible)
4236         EMIT_ONE_ASCII_BYTE (c);
4237       else if (CHAR_BYTE8_P (c))
4238         {
4239           c = CHAR_TO_BYTE8 (c);
4240           EMIT_ONE_BYTE (c);
4241         }
4242       else
4243         {
4244           unsigned code;
4245           struct charset *charset = char_charset (c, charset_list, &code);
4246
4247           if (! charset)
4248             {
4249               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4250                 {
4251                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4252                   charset = CHARSET_FROM_ID (charset_ascii);
4253                 }
4254               else
4255                 {
4256                   c = coding->default_char;
4257                   charset = char_charset (c, charset_list, &code);
4258                 }
4259             }
4260           if (code == CHARSET_INVALID_CODE (charset))
4261             abort ();
4262           if (charset == charset_big5)
4263             {
4264               int c1, c2;
4265
4266               c1 = code >> 8, c2 = code & 0xFF;
4267               EMIT_TWO_BYTES (c1, c2);
4268             }
4269           else
4270             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4271         }
4272     }
4273   coding->result = CODING_RESULT_SUCCESS;
4274   coding->produced_char += produced_chars;
4275   coding->produced = dst - coding->destination;
4276   return 0;
4277 }
4278
4279 \f
4280 /*** 10. CCL handlers ***/
4281
4282 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4283    Check if a text is encoded in a coding system of which
4284    encoder/decoder are written in CCL program.  If it is, return
4285    CATEGORY_MASK_CCL, else return 0.  */
4286
4287 static int
4288 detect_coding_ccl (coding, detect_info)
4289      struct coding_system *coding;
4290      struct coding_detection_info *detect_info;
4291 {
4292   unsigned char *src = coding->source, *src_base = src;
4293   unsigned char *src_end = coding->source + coding->src_bytes;
4294   int multibytep = coding->src_multibyte;
4295   int consumed_chars = 0;
4296   int found = 0;
4297   unsigned char *valids = CODING_CCL_VALIDS (coding);
4298   int head_ascii = coding->head_ascii;
4299   Lisp_Object attrs;
4300
4301   detect_info->checked |= CATEGORY_MASK_CCL;
4302
4303   coding = &coding_categories[coding_category_ccl];
4304   attrs = CODING_ID_ATTRS (coding->id);
4305   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4306     src += head_ascii;
4307
4308   while (1)
4309     {
4310       int c;
4311       ONE_MORE_BYTE (c);
4312       if (! valids[c])
4313         break;
4314       if ((valids[c] > 1))
4315         found = CATEGORY_MASK_CCL;
4316     }
4317   detect_info->rejected |= CATEGORY_MASK_CCL;
4318   return 0;
4319
4320  no_more_source:
4321   detect_info->found |= found;
4322   return 1;
4323 }
4324
4325 static void
4326 decode_coding_ccl (coding)
4327      struct coding_system *coding;
4328 {
4329   const unsigned char *src = coding->source + coding->consumed;
4330   unsigned char *src_end = coding->source + coding->src_bytes;
4331   int *charbuf = coding->charbuf;
4332   int *charbuf_end = charbuf + coding->charbuf_size;
4333   int consumed_chars = 0;
4334   int multibytep = coding->src_multibyte;
4335   struct ccl_program ccl;
4336   int source_charbuf[1024];
4337   int source_byteidx[1024];
4338   Lisp_Object attrs, eol_type, charset_list;
4339
4340   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4341   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4342
4343   while (src < src_end)
4344     {
4345       const unsigned char *p = src;
4346       int *source, *source_end;
4347       int i = 0;
4348
4349       if (multibytep)
4350         while (i < 1024 && p < src_end)
4351           {
4352             source_byteidx[i] = p - src;
4353             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4354           }
4355       else
4356         while (i < 1024 && p < src_end)
4357           source_charbuf[i++] = *p++;
4358
4359       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4360         ccl.last_block = 1;
4361
4362       source = source_charbuf;
4363       source_end = source + i;
4364       while (source < source_end)
4365         {
4366           ccl_driver (&ccl, source, charbuf,
4367                       source_end - source, charbuf_end - charbuf,
4368                       charset_list);
4369           source += ccl.consumed;
4370           charbuf += ccl.produced;
4371           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4372             break;
4373         }
4374       if (source < source_end)
4375         src += source_byteidx[source - source_charbuf];
4376       else
4377         src = p;
4378       consumed_chars += source - source_charbuf;
4379
4380       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4381           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4382         break;
4383     }
4384
4385   switch (ccl.status)
4386     {
4387     case CCL_STAT_SUSPEND_BY_SRC:
4388       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4389       break;
4390     case CCL_STAT_SUSPEND_BY_DST:
4391       break;
4392     case CCL_STAT_QUIT:
4393     case CCL_STAT_INVALID_CMD:
4394       coding->result = CODING_RESULT_INTERRUPT;
4395       break;
4396     default:
4397       coding->result = CODING_RESULT_SUCCESS;
4398       break;
4399     }
4400   coding->consumed_char += consumed_chars;
4401   coding->consumed = src - coding->source;
4402   coding->charbuf_used = charbuf - coding->charbuf;
4403 }
4404
4405 static int
4406 encode_coding_ccl (coding)
4407      struct coding_system *coding;
4408 {
4409   struct ccl_program ccl;
4410   int multibytep = coding->dst_multibyte;
4411   int *charbuf = coding->charbuf;
4412   int *charbuf_end = charbuf + coding->charbuf_used;
4413   unsigned char *dst = coding->destination + coding->produced;
4414   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4415   unsigned char *adjusted_dst_end = dst_end - 1;
4416   int destination_charbuf[1024];
4417   int i, produced_chars = 0;
4418   Lisp_Object attrs, eol_type, charset_list;
4419
4420   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4421   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4422
4423   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4424   ccl.dst_multibyte = coding->dst_multibyte;
4425
4426   while (charbuf < charbuf_end && dst < adjusted_dst_end)
4427     {
4428       int dst_bytes = dst_end - dst;
4429       if (dst_bytes > 1024)
4430         dst_bytes = 1024;
4431
4432       ccl_driver (&ccl, charbuf, destination_charbuf,
4433                   charbuf_end - charbuf, dst_bytes, charset_list);
4434       charbuf += ccl.consumed;
4435       if (multibytep)
4436         for (i = 0; i < ccl.produced; i++)
4437           EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4438       else
4439         {
4440           for (i = 0; i < ccl.produced; i++)
4441             *dst++ = destination_charbuf[i] & 0xFF;
4442           produced_chars += ccl.produced;
4443         }
4444     }
4445
4446   switch (ccl.status)
4447     {
4448     case CCL_STAT_SUSPEND_BY_SRC:
4449       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4450       break;
4451     case CCL_STAT_SUSPEND_BY_DST:
4452       coding->result = CODING_RESULT_INSUFFICIENT_DST;
4453       break;
4454     case CCL_STAT_QUIT:
4455     case CCL_STAT_INVALID_CMD:
4456       coding->result = CODING_RESULT_INTERRUPT;
4457       break;
4458     default:
4459       coding->result = CODING_RESULT_SUCCESS;
4460       break;
4461     }
4462
4463   coding->produced_char += produced_chars;
4464   coding->produced = dst - coding->destination;
4465   return 0;
4466 }
4467
4468
4469 \f
4470 /*** 10, 11. no-conversion handlers ***/
4471
4472 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4473
4474 static void
4475 decode_coding_raw_text (coding)
4476      struct coding_system *coding;
4477 {
4478   coding->chars_at_source = 1;
4479   coding->consumed_char = 0;
4480   coding->consumed = 0;
4481   coding->result = CODING_RESULT_SUCCESS;
4482 }
4483
4484 static int
4485 encode_coding_raw_text (coding)
4486      struct coding_system *coding;
4487 {
4488   int multibytep = coding->dst_multibyte;
4489   int *charbuf = coding->charbuf;
4490   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4491   unsigned char *dst = coding->destination + coding->produced;
4492   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4493   int produced_chars = 0;
4494   int c;
4495
4496   if (multibytep)
4497     {
4498       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4499
4500       if (coding->src_multibyte)
4501         while (charbuf < charbuf_end)
4502           {
4503             ASSURE_DESTINATION (safe_room);
4504             c = *charbuf++;
4505             if (ASCII_CHAR_P (c))
4506               EMIT_ONE_ASCII_BYTE (c);
4507             else if (CHAR_BYTE8_P (c))
4508               {
4509                 c = CHAR_TO_BYTE8 (c);
4510                 EMIT_ONE_BYTE (c);
4511               }
4512             else
4513               {
4514                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4515
4516                 CHAR_STRING_ADVANCE (c, p1);
4517                 while (p0 < p1)
4518                   {
4519                     EMIT_ONE_BYTE (*p0);
4520                     p0++;
4521                   }
4522               }
4523           }
4524       else
4525         while (charbuf < charbuf_end)
4526           {
4527             ASSURE_DESTINATION (safe_room);
4528             c = *charbuf++;
4529             EMIT_ONE_BYTE (c);
4530           }
4531     }
4532   else
4533     {
4534       if (coding->src_multibyte)
4535         {
4536           int safe_room = MAX_MULTIBYTE_LENGTH;
4537
4538           while (charbuf < charbuf_end)
4539             {
4540               ASSURE_DESTINATION (safe_room);
4541               c = *charbuf++;
4542               if (ASCII_CHAR_P (c))
4543                 *dst++ = c;
4544               else if (CHAR_BYTE8_P (c))
4545                 *dst++ = CHAR_TO_BYTE8 (c);
4546               else
4547                 CHAR_STRING_ADVANCE (c, dst);
4548               produced_chars++;
4549             }
4550         }
4551       else
4552         {
4553           ASSURE_DESTINATION (charbuf_end - charbuf);
4554           while (charbuf < charbuf_end && dst < dst_end)
4555             *dst++ = *charbuf++;
4556           produced_chars = dst - (coding->destination + coding->dst_bytes);
4557         }
4558     }
4559   coding->result = CODING_RESULT_SUCCESS;
4560   coding->produced_char += produced_chars;
4561   coding->produced = dst - coding->destination;
4562   return 0;
4563 }
4564
4565 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4566    Check if a text is encoded in a charset-based coding system.  If it
4567    is, return 1, else return 0.  */
4568
4569 static int
4570 detect_coding_charset (coding, detect_info)
4571      struct coding_system *coding;
4572      struct coding_detection_info *detect_info;
4573 {
4574   unsigned char *src = coding->source, *src_base = src;
4575   unsigned char *src_end = coding->source + coding->src_bytes;
4576   int multibytep = coding->src_multibyte;
4577   int consumed_chars = 0;
4578   Lisp_Object attrs, valids;
4579   int found = 0;
4580
4581   detect_info->checked |= CATEGORY_MASK_CHARSET;
4582
4583   coding = &coding_categories[coding_category_charset];
4584   attrs = CODING_ID_ATTRS (coding->id);
4585   valids = AREF (attrs, coding_attr_charset_valids);
4586
4587   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4588     src += coding->head_ascii;
4589
4590   while (1)
4591     {
4592       int c;
4593
4594       ONE_MORE_BYTE (c);
4595       if (NILP (AREF (valids, c)))
4596         break;
4597       if (c >= 0x80)
4598         found = CATEGORY_MASK_CHARSET;
4599     }
4600   detect_info->rejected |= CATEGORY_MASK_CHARSET;
4601   return 0;
4602
4603  no_more_source:
4604   detect_info->found |= found;
4605   return 1;
4606 }
4607
4608 static void
4609 decode_coding_charset (coding)
4610      struct coding_system *coding;
4611 {
4612   unsigned char *src = coding->source + coding->consumed;
4613   unsigned char *src_end = coding->source + coding->src_bytes;
4614   unsigned char *src_base;
4615   int *charbuf = coding->charbuf;
4616   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4617   int consumed_chars = 0, consumed_chars_base;
4618   int multibytep = coding->src_multibyte;
4619   Lisp_Object attrs, eol_type, charset_list, valids;
4620   int char_offset = coding->produced_char;
4621   int last_offset = char_offset;
4622   int last_id = charset_ascii;
4623
4624   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4625   valids = AREF (attrs, coding_attr_charset_valids);
4626
4627   while (1)
4628     {
4629       int c;
4630
4631       src_base = src;
4632       consumed_chars_base = consumed_chars;
4633
4634       if (charbuf >= charbuf_end)
4635         break;
4636
4637       ONE_MORE_BYTE (c);
4638       if (c == '\r')
4639         {
4640           /* Here we assume that no charset maps '\r' to something
4641              else.  */
4642           if (EQ (eol_type, Qdos))
4643             {
4644               if (src == src_end)
4645                 {
4646                   coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4647                   goto no_more_source;
4648                 }
4649               if (*src == '\n')
4650                 ONE_MORE_BYTE (c);
4651             }
4652           else if (EQ (eol_type, Qmac))
4653             c = '\n';
4654         }
4655       else
4656         {
4657           Lisp_Object val;
4658           struct charset *charset;
4659           int dim;
4660           int len = 1;
4661           unsigned code = c;
4662
4663           val = AREF (valids, c);
4664           if (NILP (val))
4665             goto invalid_code;
4666           if (INTEGERP (val))
4667             {
4668               charset = CHARSET_FROM_ID (XFASTINT (val));
4669               dim = CHARSET_DIMENSION (charset);
4670               while (len < dim)
4671                 {
4672                   ONE_MORE_BYTE (c);
4673                   code = (code << 8) | c;
4674                   len++;
4675                 }
4676               CODING_DECODE_CHAR (coding, src, src_base, src_end,
4677                                   charset, code, c);
4678             }
4679           else
4680             {
4681               /* VAL is a list of charset IDs.  It is assured that the
4682                  list is sorted by charset dimensions (smaller one
4683                  comes first).  */
4684               while (CONSP (val))
4685                 {
4686                   charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4687                   dim = CHARSET_DIMENSION (charset);
4688                   while (len < dim)
4689                     {
4690                       ONE_MORE_BYTE (c);
4691                       code = (code << 8) | c;
4692                       len++;
4693                     }
4694                   CODING_DECODE_CHAR (coding, src, src_base,
4695                                       src_end, charset, code, c);
4696                   if (c >= 0)
4697                     break;
4698                   val = XCDR (val);
4699                 }
4700             }
4701           if (c < 0)
4702             goto invalid_code;
4703           if (charset->id != charset_ascii
4704               && last_id != charset->id)
4705             {
4706               if (last_id != charset_ascii)
4707                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4708               last_id = charset->id;
4709               last_offset = char_offset;
4710             }
4711         }
4712       *charbuf++ = c;
4713       char_offset++;
4714       continue;
4715
4716     invalid_code:
4717       src = src_base;
4718       consumed_chars = consumed_chars_base;
4719       ONE_MORE_BYTE (c);
4720       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4721       char_offset++;
4722       coding->errors++;
4723     }
4724
4725  no_more_source:
4726   if (last_id != charset_ascii)
4727     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4728   coding->consumed_char += consumed_chars_base;
4729   coding->consumed = src_base - coding->source;
4730   coding->charbuf_used = charbuf - coding->charbuf;
4731 }
4732
4733 static int
4734 encode_coding_charset (coding)
4735      struct coding_system *coding;
4736 {
4737   int multibytep = coding->dst_multibyte;
4738   int *charbuf = coding->charbuf;
4739   int *charbuf_end = charbuf + coding->charbuf_used;
4740   unsigned char *dst = coding->destination + coding->produced;
4741   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4742   int safe_room = MAX_MULTIBYTE_LENGTH;
4743   int produced_chars = 0;
4744   Lisp_Object attrs, eol_type, charset_list;
4745   int ascii_compatible;
4746   int c;
4747
4748   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4749   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4750
4751   while (charbuf < charbuf_end)
4752     {
4753       struct charset *charset;
4754       unsigned code;
4755
4756       ASSURE_DESTINATION (safe_room);
4757       c = *charbuf++;
4758       if (ascii_compatible && ASCII_CHAR_P (c))
4759         EMIT_ONE_ASCII_BYTE (c);
4760       else if (CHAR_BYTE8_P (c))
4761         {
4762           c = CHAR_TO_BYTE8 (c);
4763           EMIT_ONE_BYTE (c);
4764         }
4765       else
4766         {
4767           charset = char_charset (c, charset_list, &code);
4768           if (charset)
4769             {
4770               if (CHARSET_DIMENSION (charset) == 1)
4771                 EMIT_ONE_BYTE (code);
4772               else if (CHARSET_DIMENSION (charset) == 2)
4773                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4774               else if (CHARSET_DIMENSION (charset) == 3)
4775                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4776               else
4777                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4778                                  (code >> 8) & 0xFF, code & 0xFF);
4779             }
4780           else
4781             {
4782               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4783                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4784               else
4785                 c = coding->default_char;
4786               EMIT_ONE_BYTE (c);
4787             }
4788         }
4789     }
4790
4791   coding->result = CODING_RESULT_SUCCESS;
4792   coding->produced_char += produced_chars;
4793   coding->produced = dst - coding->destination;
4794   return 0;
4795 }
4796
4797 \f
4798 /*** 7. C library functions ***/
4799
4800 /* Setup coding context CODING from information about CODING_SYSTEM.
4801    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
4802    CODING_SYSTEM is invalid, signal an error.  */
4803
4804 void
4805 setup_coding_system (coding_system, coding)
4806      Lisp_Object coding_system;
4807      struct coding_system *coding;
4808 {
4809   Lisp_Object attrs;
4810   Lisp_Object eol_type;
4811   Lisp_Object coding_type;
4812   Lisp_Object val;
4813
4814   if (NILP (coding_system))
4815     coding_system = Qno_conversion;
4816
4817   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4818
4819   attrs = CODING_ID_ATTRS (coding->id);
4820   eol_type = CODING_ID_EOL_TYPE (coding->id);
4821
4822   coding->mode = 0;
4823   coding->head_ascii = -1;
4824   coding->common_flags
4825     = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4826   if (! NILP (CODING_ATTR_POST_READ (attrs)))
4827     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
4828   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
4829     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
4830
4831   val = CODING_ATTR_SAFE_CHARSETS (attrs);
4832   coding->max_charset_id = XSTRING (val)->size - 1;
4833   coding->safe_charsets = (char *) XSTRING (val)->data;
4834   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4835
4836   coding_type = CODING_ATTR_TYPE (attrs);
4837   if (EQ (coding_type, Qundecided))
4838     {
4839       coding->detector = NULL;
4840       coding->decoder = decode_coding_raw_text;
4841       coding->encoder = encode_coding_raw_text;
4842       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4843     }
4844   else if (EQ (coding_type, Qiso_2022))
4845     {
4846       int i;
4847       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4848
4849       /* Invoke graphic register 0 to plane 0.  */
4850       CODING_ISO_INVOCATION (coding, 0) = 0;
4851       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
4852       CODING_ISO_INVOCATION (coding, 1)
4853         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4854       /* Setup the initial status of designation.  */
4855       for (i = 0; i < 4; i++)
4856         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4857       /* Not single shifting initially.  */
4858       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4859       /* Beginning of buffer should also be regarded as bol. */
4860       CODING_ISO_BOL (coding) = 1;
4861       coding->detector = detect_coding_iso_2022;
4862       coding->decoder = decode_coding_iso_2022;
4863       coding->encoder = encode_coding_iso_2022;
4864       if (flags & CODING_ISO_FLAG_SAFE)
4865         coding->mode |= CODING_MODE_SAFE_ENCODING;
4866       coding->common_flags
4867         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4868             | CODING_REQUIRE_FLUSHING_MASK);
4869       if (flags & CODING_ISO_FLAG_COMPOSITION)
4870         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4871       if (flags & CODING_ISO_FLAG_DESIGNATION)
4872         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
4873       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4874         {
4875           setup_iso_safe_charsets (attrs);
4876           val = CODING_ATTR_SAFE_CHARSETS (attrs);
4877           coding->max_charset_id = XSTRING (val)->size - 1;
4878           coding->safe_charsets = (char *) XSTRING (val)->data;
4879         }
4880       CODING_ISO_FLAGS (coding) = flags;
4881     }
4882   else if (EQ (coding_type, Qcharset))
4883     {
4884       coding->detector = detect_coding_charset;
4885       coding->decoder = decode_coding_charset;
4886       coding->encoder = encode_coding_charset;
4887       coding->common_flags
4888         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4889     }
4890   else if (EQ (coding_type, Qutf_8))
4891     {
4892       coding->detector = detect_coding_utf_8;
4893       coding->decoder = decode_coding_utf_8;
4894       coding->encoder = encode_coding_utf_8;
4895       coding->common_flags
4896         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4897     }
4898   else if (EQ (coding_type, Qutf_16))
4899     {
4900       val = AREF (attrs, coding_attr_utf_16_bom);
4901       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4902                                     : EQ (val, Qt) ? utf_16_with_bom
4903                                     : utf_16_without_bom);
4904       val = AREF (attrs, coding_attr_utf_16_endian);
4905       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
4906                                        : utf_16_little_endian);
4907       CODING_UTF_16_SURROGATE (coding) = 0;
4908       coding->detector = detect_coding_utf_16;
4909       coding->decoder = decode_coding_utf_16;
4910       coding->encoder = encode_coding_utf_16;
4911       coding->common_flags
4912         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4913       if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
4914         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4915     }
4916   else if (EQ (coding_type, Qccl))
4917     {
4918       coding->detector = detect_coding_ccl;
4919       coding->decoder = decode_coding_ccl;
4920       coding->encoder = encode_coding_ccl;
4921       coding->common_flags
4922         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4923             | CODING_REQUIRE_FLUSHING_MASK);
4924     }
4925   else if (EQ (coding_type, Qemacs_mule))
4926     {
4927       coding->detector = detect_coding_emacs_mule;
4928       coding->decoder = decode_coding_emacs_mule;
4929       coding->encoder = encode_coding_emacs_mule;
4930       coding->common_flags
4931         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4932       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4933           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4934         {
4935           Lisp_Object tail, safe_charsets;
4936           int max_charset_id = 0;
4937
4938           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4939                tail = XCDR (tail))
4940             if (max_charset_id < XFASTINT (XCAR (tail)))
4941               max_charset_id = XFASTINT (XCAR (tail));
4942           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4943                                         make_number (255));
4944           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4945                tail = XCDR (tail))
4946             XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4947           coding->max_charset_id = max_charset_id;
4948           coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4949         }
4950     }
4951   else if (EQ (coding_type, Qshift_jis))
4952     {
4953       coding->detector = detect_coding_sjis;
4954       coding->decoder = decode_coding_sjis;
4955       coding->encoder = encode_coding_sjis;
4956       coding->common_flags
4957         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4958     }
4959   else if (EQ (coding_type, Qbig5))
4960     {
4961       coding->detector = detect_coding_big5;
4962       coding->decoder = decode_coding_big5;
4963       coding->encoder = encode_coding_big5;
4964       coding->common_flags
4965         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4966     }
4967   else                          /* EQ (coding_type, Qraw_text) */
4968     {
4969       coding->detector = NULL;
4970       coding->decoder = decode_coding_raw_text;
4971       coding->encoder = encode_coding_raw_text;
4972       coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4973     }
4974
4975   return;
4976 }
4977
4978 /* Return raw-text or one of its subsidiaries that has the same
4979    eol_type as CODING-SYSTEM.  */
4980
4981 Lisp_Object
4982 raw_text_coding_system (coding_system)
4983      Lisp_Object coding_system;
4984 {
4985   Lisp_Object spec, attrs;
4986   Lisp_Object eol_type, raw_text_eol_type;
4987
4988   if (NILP (coding_system))
4989     return Qraw_text;
4990   spec = CODING_SYSTEM_SPEC (coding_system);
4991   attrs = AREF (spec, 0);
4992
4993   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4994     return coding_system;
4995
4996   eol_type = AREF (spec, 2);
4997   if (VECTORP (eol_type))
4998     return Qraw_text;
4999   spec = CODING_SYSTEM_SPEC (Qraw_text);
5000   raw_text_eol_type = AREF (spec, 2);
5001   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5002           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5003           : AREF (raw_text_eol_type, 2));
5004 }
5005
5006
5007 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5008    does, return one of the subsidiary that has the same eol-spec as
5009    PARENT.  Otherwise, return CODING_SYSTEM.  */
5010
5011 Lisp_Object
5012 coding_inherit_eol_type (coding_system, parent)
5013      Lisp_Object coding_system, parent;
5014 {
5015   Lisp_Object spec, attrs, eol_type;
5016
5017   if (NILP (coding_system))
5018     coding_system = Qraw_text;
5019   spec = CODING_SYSTEM_SPEC (coding_system);
5020   attrs = AREF (spec, 0);
5021   eol_type = AREF (spec, 2);
5022   if (VECTORP (eol_type)
5023       && ! NILP (parent))
5024     {
5025       Lisp_Object parent_spec;
5026       Lisp_Object parent_eol_type;
5027
5028       parent_spec
5029         = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
5030       parent_eol_type = AREF (parent_spec, 2);
5031       if (EQ (parent_eol_type, Qunix))
5032         coding_system = AREF (eol_type, 0);
5033       else if (EQ (parent_eol_type, Qdos))
5034         coding_system = AREF (eol_type, 1);
5035       else if (EQ (parent_eol_type, Qmac))
5036         coding_system = AREF (eol_type, 2);
5037     }
5038   return coding_system;
5039 }
5040
5041 /* Emacs has a mechanism to automatically detect a coding system if it
5042    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5043    it's impossible to distinguish some coding systems accurately
5044    because they use the same range of codes.  So, at first, coding
5045    systems are categorized into 7, those are:
5046
5047    o coding-category-emacs-mule
5048
5049         The category for a coding system which has the same code range
5050         as Emacs' internal format.  Assigned the coding-system (Lisp
5051         symbol) `emacs-mule' by default.
5052
5053    o coding-category-sjis
5054
5055         The category for a coding system which has the same code range
5056         as SJIS.  Assigned the coding-system (Lisp
5057         symbol) `japanese-shift-jis' by default.
5058
5059    o coding-category-iso-7
5060
5061         The category for a coding system which has the same code range
5062         as ISO2022 of 7-bit environment.  This doesn't use any locking
5063         shift and single shift functions.  This can encode/decode all
5064         charsets.  Assigned the coding-system (Lisp symbol)
5065         `iso-2022-7bit' by default.
5066
5067    o coding-category-iso-7-tight
5068
5069         Same as coding-category-iso-7 except that this can
5070         encode/decode only the specified charsets.
5071
5072    o coding-category-iso-8-1
5073
5074         The category for a coding system which has the same code range
5075         as ISO2022 of 8-bit environment and graphic plane 1 used only
5076         for DIMENSION1 charset.  This doesn't use any locking shift
5077         and single shift functions.  Assigned the coding-system (Lisp
5078         symbol) `iso-latin-1' by default.
5079
5080    o coding-category-iso-8-2
5081
5082         The category for a coding system which has the same code range
5083         as ISO2022 of 8-bit environment and graphic plane 1 used only
5084         for DIMENSION2 charset.  This doesn't use any locking shift
5085         and single shift functions.  Assigned the coding-system (Lisp
5086         symbol) `japanese-iso-8bit' by default.
5087
5088    o coding-category-iso-7-else
5089
5090         The category for a coding system which has the same code range
5091         as ISO2022 of 7-bit environemnt but uses locking shift or
5092         single shift functions.  Assigned the coding-system (Lisp
5093         symbol) `iso-2022-7bit-lock' by default.
5094
5095    o coding-category-iso-8-else
5096
5097         The category for a coding system which has the same code range
5098         as ISO2022 of 8-bit environemnt but uses locking shift or
5099         single shift functions.  Assigned the coding-system (Lisp
5100         symbol) `iso-2022-8bit-ss2' by default.
5101
5102    o coding-category-big5
5103
5104         The category for a coding system which has the same code range
5105         as BIG5.  Assigned the coding-system (Lisp symbol)
5106         `cn-big5' by default.
5107
5108    o coding-category-utf-8
5109
5110         The category for a coding system which has the same code range
5111         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
5112         symbol) `utf-8' by default.
5113
5114    o coding-category-utf-16-be
5115
5116         The category for a coding system in which a text has an
5117         Unicode signature (cf. Unicode Standard) in the order of BIG
5118         endian at the head.  Assigned the coding-system (Lisp symbol)
5119         `utf-16-be' by default.
5120
5121    o coding-category-utf-16-le
5122
5123         The category for a coding system in which a text has an
5124         Unicode signature (cf. Unicode Standard) in the order of
5125         LITTLE endian at the head.  Assigned the coding-system (Lisp
5126         symbol) `utf-16-le' by default.
5127
5128    o coding-category-ccl
5129
5130         The category for a coding system of which encoder/decoder is
5131         written in CCL programs.  The default value is nil, i.e., no
5132         coding system is assigned.
5133
5134    o coding-category-binary
5135
5136         The category for a coding system not categorized in any of the
5137         above.  Assigned the coding-system (Lisp symbol)
5138         `no-conversion' by default.
5139
5140    Each of them is a Lisp symbol and the value is an actual
5141    `coding-system's (this is also a Lisp symbol) assigned by a user.
5142    What Emacs does actually is to detect a category of coding system.
5143    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5144    decide only one possible category, it selects a category of the
5145    highest priority.  Priorities of categories are also specified by a
5146    user in a Lisp variable `coding-category-list'.
5147
5148 */
5149
5150 #define EOL_SEEN_NONE   0
5151 #define EOL_SEEN_LF     1
5152 #define EOL_SEEN_CR     2
5153 #define EOL_SEEN_CRLF   4
5154
5155 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5156    SOURCE is encoded.  If CATEGORY is one of
5157    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5158    two-byte, else they are encoded by one-byte.
5159
5160    Return one of EOL_SEEN_XXX.  */
5161
5162 #define MAX_EOL_CHECK_COUNT 3
5163
5164 static int
5165 detect_eol (source, src_bytes, category)
5166      unsigned char *source;
5167      EMACS_INT src_bytes;
5168      enum coding_category category;
5169 {
5170   unsigned char *src = source, *src_end = src + src_bytes;
5171   unsigned char c;
5172   int total  = 0;
5173   int eol_seen = EOL_SEEN_NONE;
5174
5175   if ((1 << category) & CATEGORY_MASK_UTF_16)
5176     {
5177       int msb, lsb;
5178
5179       msb = category == (coding_category_utf_16_le
5180                          | coding_category_utf_16_le_nosig);
5181       lsb = 1 - msb;
5182
5183       while (src + 1 < src_end)
5184         {
5185           c = src[lsb];
5186           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5187             {
5188               int this_eol;
5189
5190               if (c == '\n')
5191                 this_eol = EOL_SEEN_LF;
5192               else if (src + 3 >= src_end
5193                        || src[msb + 2] != 0
5194                        || src[lsb + 2] != '\n')
5195                 this_eol = EOL_SEEN_CR;
5196               else
5197                 this_eol = EOL_SEEN_CRLF;
5198
5199               if (eol_seen == EOL_SEEN_NONE)
5200                 /* This is the first end-of-line.  */
5201                 eol_seen = this_eol;
5202               else if (eol_seen != this_eol)
5203                 {
5204                   /* The found type is different from what found before.  */
5205                   eol_seen = EOL_SEEN_LF;
5206                   break;
5207                 }
5208               if (++total == MAX_EOL_CHECK_COUNT)
5209                 break;
5210             }
5211           src += 2;
5212         }
5213     }
5214   else
5215     {
5216       while (src < src_end)
5217         {
5218           c = *src++;
5219           if (c == '\n' || c == '\r')
5220             {
5221               int this_eol;
5222
5223               if (c == '\n')
5224                 this_eol = EOL_SEEN_LF;
5225               else if (src >= src_end || *src != '\n')
5226                 this_eol = EOL_SEEN_CR;
5227               else
5228                 this_eol = EOL_SEEN_CRLF, src++;
5229
5230               if (eol_seen == EOL_SEEN_NONE)
5231                 /* This is the first end-of-line.  */
5232                 eol_seen = this_eol;
5233               else if (eol_seen != this_eol)
5234                 {
5235                   /* The found type is different from what found before.  */
5236                   eol_seen = EOL_SEEN_LF;
5237                   break;
5238                 }
5239               if (++total == MAX_EOL_CHECK_COUNT)
5240                 break;
5241             }
5242         }
5243     }
5244   return eol_seen;
5245 }
5246
5247
5248 static void
5249 adjust_coding_eol_type (coding, eol_seen)
5250      struct coding_system *coding;
5251      int eol_seen;
5252 {
5253   Lisp_Object eol_type;
5254
5255   eol_type = CODING_ID_EOL_TYPE (coding->id);
5256   if (eol_seen & EOL_SEEN_LF)
5257     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5258   else if (eol_seen & EOL_SEEN_CRLF)
5259     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5260   else if (eol_seen & EOL_SEEN_CR)
5261     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5262 }
5263
5264 /* Detect how a text specified in CODING is encoded.  If a coding
5265    system is detected, update fields of CODING by the detected coding
5266    system.  */
5267
5268 void
5269 detect_coding (coding)
5270      struct coding_system *coding;
5271 {
5272   unsigned char *src, *src_end;
5273   Lisp_Object attrs, coding_type;
5274
5275   coding->consumed = coding->consumed_char = 0;
5276   coding->produced = coding->produced_char = 0;
5277   coding_set_source (coding);
5278
5279   src_end = coding->source + coding->src_bytes;
5280
5281   /* If we have not yet decided the text encoding type, detect it
5282      now.  */
5283   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5284     {
5285       int c, i;
5286
5287       for (src = coding->source; src < src_end; src++)
5288         {
5289           c = *src;
5290           if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5291                                         || c == ISO_CODE_SI
5292                                         || c == ISO_CODE_SO)))
5293             break;
5294         }
5295       coding->head_ascii = src - (coding->source + coding->consumed);
5296
5297       if (coding->head_ascii < coding->src_bytes)
5298         {
5299           struct coding_detection_info detect_info;
5300           enum coding_category category;
5301           struct coding_system *this;
5302
5303           detect_info.checked = detect_info.found = detect_info.rejected = 0;
5304           for (i = 0; i < coding_category_raw_text; i++)
5305             {
5306               category = coding_priorities[i];
5307               this = coding_categories + category;
5308               if (this->id < 0)
5309                 {
5310                   /* No coding system of this category is defined.  */
5311                   detect_info.rejected |= (1 << category);
5312                 }
5313               else if (category >= coding_category_raw_text)
5314                 continue;
5315               else if (detect_info.checked & (1 << category))
5316                 {
5317                   if (detect_info.found & (1 << category))
5318                     break;
5319                 }
5320               else if ((*(this->detector)) (coding, &detect_info)
5321                        && detect_info.found & (1 << category))
5322                 break;
5323             }
5324           if (i < coding_category_raw_text)
5325             setup_coding_system (CODING_ID_NAME (this->id), coding);
5326           else if (detect_info.rejected == CATEGORY_MASK_ANY)
5327             setup_coding_system (Qraw_text, coding);
5328           else if (detect_info.rejected)
5329             for (i = 0; i < coding_category_raw_text; i++)
5330               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5331                 {
5332                   this = coding_categories + coding_priorities[i];
5333                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5334                   break;
5335                 }
5336         }
5337     }
5338   else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16))
5339     {
5340       Lisp_Object coding_systems;
5341       struct coding_detection_info detect_info;
5342
5343       coding_systems
5344         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5345       detect_info.found = detect_info.rejected = 0;
5346       if (CONSP (coding_systems)
5347           && detect_coding_utf_16 (coding, &detect_info)
5348           && (detect_info.found & (CATEGORY_MASK_UTF_16_LE
5349                                    | CATEGORY_MASK_UTF_16_BE)))
5350         {
5351           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5352             setup_coding_system (XCAR (coding_systems), coding);
5353           else
5354             setup_coding_system (XCDR (coding_systems), coding);
5355         }
5356     }
5357
5358   attrs = CODING_ID_ATTRS (coding->id);
5359   coding_type = CODING_ATTR_TYPE (attrs);
5360
5361   /* If we have not yet decided the EOL type, detect it now.  But, the
5362      detection is impossible for a CCL based coding system, in which
5363      case, we detct the EOL type after decoding.  */
5364   if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5365       && ! EQ (coding_type, Qccl))
5366     {
5367       int eol_seen = detect_eol (coding->source, coding->src_bytes,
5368                                  XINT (CODING_ATTR_CATEGORY (attrs)));
5369
5370       if (eol_seen != EOL_SEEN_NONE)
5371         adjust_coding_eol_type (coding, eol_seen);
5372     }
5373 }
5374
5375
5376 static void
5377 decode_eol (coding)
5378      struct coding_system *coding;
5379 {
5380   if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
5381     {
5382       unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5383       unsigned char *pend = p + coding->produced;
5384       int eol_seen = EOL_SEEN_NONE;
5385
5386       for (; p < pend; p++)
5387         {
5388           if (*p == '\n')
5389             eol_seen |= EOL_SEEN_LF;
5390           else if (*p == '\r')
5391             {
5392               if (p + 1 < pend && *(p + 1) == '\n')
5393                 {
5394                   eol_seen |= EOL_SEEN_CRLF;
5395                   p++;
5396                 }
5397               else
5398                 eol_seen |= EOL_SEEN_CR;
5399             }
5400         }
5401       if (eol_seen != EOL_SEEN_NONE)
5402         adjust_coding_eol_type (coding, eol_seen);
5403     }
5404
5405   if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5406     {
5407       unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5408       unsigned char *pend = p + coding->produced;
5409
5410       for (; p < pend; p++)
5411         if (*p == '\r')
5412           *p = '\n';
5413     }
5414   else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5415     {
5416       unsigned char *p, *pbeg, *pend;
5417       Lisp_Object undo_list;
5418
5419       move_gap_both (coding->dst_pos + coding->produced_char,
5420                      coding->dst_pos_byte + coding->produced);
5421       undo_list = current_buffer->undo_list;
5422       current_buffer->undo_list = Qt;
5423       del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
5424       current_buffer->undo_list = undo_list;
5425       pbeg = GPT_ADDR;
5426       pend = pbeg + coding->produced;
5427
5428       for (p = pend - 1; p >= pbeg; p--)
5429         if (*p == '\r')
5430           {
5431             safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5432             pend--;
5433           }
5434       coding->produced_char -= coding->produced - (pend - pbeg);
5435       coding->produced = pend - pbeg;
5436       insert_from_gap (coding->produced_char, coding->produced);
5437     }
5438 }
5439
5440 static void
5441 translate_chars (coding, table)
5442      struct coding_system *coding;
5443      Lisp_Object table;
5444 {
5445   int *charbuf = coding->charbuf;
5446   int *charbuf_end = charbuf + coding->charbuf_used;
5447   int c;
5448
5449   if (coding->chars_at_source)
5450     return;
5451
5452   while (charbuf < charbuf_end)
5453     {
5454       c = *charbuf;
5455       if (c < 0)
5456         charbuf += c;
5457       else
5458         *charbuf++ = translate_char (table, c);
5459     }
5460 }
5461
5462 static int
5463 produce_chars (coding)
5464      struct coding_system *coding;
5465 {
5466   unsigned char *dst = coding->destination + coding->produced;
5467   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5468   int produced;
5469   int produced_chars = 0;
5470
5471   if (! coding->chars_at_source)
5472     {
5473       /* Characters are in coding->charbuf.  */
5474       int *buf = coding->charbuf;
5475       int *buf_end = buf + coding->charbuf_used;
5476       unsigned char *adjusted_dst_end;
5477
5478       if (BUFFERP (coding->src_object)
5479           && EQ (coding->src_object, coding->dst_object))
5480         dst_end = coding->source + coding->consumed;
5481       adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5482
5483       while (buf < buf_end)
5484         {
5485           int c = *buf++;
5486
5487           if (dst >= adjusted_dst_end)
5488             {
5489               dst = alloc_destination (coding,
5490                                        buf_end - buf + MAX_MULTIBYTE_LENGTH,
5491                                        dst);
5492               dst_end = coding->destination + coding->dst_bytes;
5493               adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5494             }
5495           if (c >= 0)
5496             {
5497               if (coding->dst_multibyte
5498                   || ! CHAR_BYTE8_P (c))
5499                 CHAR_STRING_ADVANCE (c, dst);
5500               else
5501                 *dst++ = CHAR_TO_BYTE8 (c);
5502               produced_chars++;
5503             }
5504           else
5505             /* This is an annotation datum.  (-C) is the length of
5506                it.  */
5507             buf += -c - 1;
5508         }
5509     }
5510   else
5511     {
5512       unsigned char *src = coding->source;
5513       unsigned char *src_end = src + coding->src_bytes;
5514       Lisp_Object eol_type;
5515
5516       eol_type = CODING_ID_EOL_TYPE (coding->id);
5517
5518       if (coding->src_multibyte != coding->dst_multibyte)
5519         {
5520           if (coding->src_multibyte)
5521             {
5522               int multibytep = 1;
5523               int consumed_chars;
5524
5525               while (1)
5526                 {
5527                   unsigned char *src_base = src;
5528                   int c;
5529
5530                   ONE_MORE_BYTE (c);
5531                   if (c == '\r')
5532                     {
5533                       if (EQ (eol_type, Qdos))
5534                         {
5535                           if (src == src_end)
5536                             {
5537                               coding->result = CODING_RESULT_INSUFFICIENT_SRC;
5538                               goto no_more_source;
5539                             }
5540                           if (*src == '\n')
5541                             c = *src++;
5542                         }
5543                       else if (EQ (eol_type, Qmac))
5544                         c = '\n';
5545                     }
5546                   if (dst == dst_end)
5547                     {
5548                       coding->consumed = src - coding->source;
5549
5550                     if (EQ (coding->src_object, coding->dst_object))
5551                       dst_end = src;
5552                     if (dst == dst_end)
5553                       {
5554                         dst = alloc_destination (coding, src_end - src + 1,
5555                                                  dst);
5556                         dst_end = coding->destination + coding->dst_bytes;
5557                         coding_set_source (coding);
5558                         src = coding->source + coding->consumed;
5559                         src_end = coding->source + coding->src_bytes;
5560                       }
5561                     }
5562                   *dst++ = c;
5563                   produced_chars++;
5564                 }
5565             no_more_source:
5566               ;
5567             }
5568           else
5569             while (src < src_end)
5570               {
5571                 int multibytep = 1;
5572                 int c = *src++;
5573
5574                 if (c == '\r')
5575                   {
5576                     if (EQ (eol_type, Qdos))
5577                       {
5578                         if (src < src_end
5579                             && *src == '\n')
5580                           c = *src++;
5581                       }
5582                     else if (EQ (eol_type, Qmac))
5583                       c = '\n';
5584                   }
5585                 if (dst >= dst_end - 1)
5586                   {
5587                     coding->consumed = src - coding->source;
5588
5589                     if (EQ (coding->src_object, coding->dst_object))
5590                       dst_end = src;
5591                     if (dst >= dst_end - 1)
5592                       {
5593                         dst = alloc_destination (coding, src_end - src + 2,
5594                                                  dst);
5595                         dst_end = coding->destination + coding->dst_bytes;
5596                         coding_set_source (coding);
5597                         src = coding->source + coding->consumed;
5598                         src_end = coding->source + coding->src_bytes;
5599                       }
5600                   }
5601                 EMIT_ONE_BYTE (c);
5602               }
5603         }
5604       else
5605         {
5606           if (!EQ (coding->src_object, coding->dst_object))
5607             {
5608               int require = coding->src_bytes - coding->dst_bytes;
5609
5610               if (require > 0)
5611                 {
5612                   EMACS_INT offset = src - coding->source;
5613
5614                   dst = alloc_destination (coding, require, dst);
5615                   coding_set_source (coding);
5616                   src = coding->source + offset;
5617                   src_end = coding->source + coding->src_bytes;
5618                 }
5619             }
5620           produced_chars = coding->src_chars;
5621           while (src < src_end)
5622             {
5623               int c = *src++;
5624
5625               if (c == '\r')
5626                 {
5627                   if (EQ (eol_type, Qdos))
5628                     {
5629                       if (src < src_end
5630                           && *src == '\n')
5631                         c = *src++;
5632                       produced_chars--;
5633                     }
5634                   else if (EQ (eol_type, Qmac))
5635                     c = '\n';
5636                 }
5637               *dst++ = c;
5638             }
5639         }
5640       coding->consumed = coding->src_bytes;
5641       coding->consumed_char = coding->src_chars;
5642     }
5643
5644   produced = dst - (coding->destination + coding->produced);
5645   if (BUFFERP (coding->dst_object))
5646     insert_from_gap (produced_chars, produced);
5647   coding->produced += produced;
5648   coding->produced_char += produced_chars;
5649   return produced_chars;
5650 }
5651
5652 /* Compose text in CODING->object according to the annotation data at
5653    CHARBUF.  CHARBUF is an array:
5654      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5655  */
5656
5657 static INLINE void
5658 produce_composition (coding, charbuf)
5659      struct coding_system *coding;
5660      int *charbuf;
5661 {
5662   int len;
5663   EMACS_INT from, to;
5664   enum composition_method method;
5665   Lisp_Object components;
5666
5667   len = -charbuf[0];
5668   from = coding->dst_pos + charbuf[2];
5669   to = coding->dst_pos + charbuf[3];
5670   method = (enum composition_method) (charbuf[4]);
5671
5672   if (method == COMPOSITION_RELATIVE)
5673     components = Qnil;
5674   else
5675     {
5676       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5677       int i;
5678
5679       len -= 5;
5680       charbuf += 5;
5681       for (i = 0; i < len; i++)
5682         args[i] = make_number (charbuf[i]);
5683       components = (method == COMPOSITION_WITH_ALTCHARS
5684                     ? Fstring (len, args) : Fvector (len, args));
5685     }
5686   compose_text (from, to, components, Qnil, coding->dst_object);
5687 }
5688
5689
5690 /* Put `charset' property on text in CODING->object according to
5691    the annotation data at CHARBUF.  CHARBUF is an array:
5692      [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5693  */
5694
5695 static INLINE void
5696 produce_charset (coding, charbuf)
5697      struct coding_system *coding;
5698      int *charbuf;
5699 {
5700   EMACS_INT from = coding->dst_pos + charbuf[2];
5701   EMACS_INT to = coding->dst_pos + charbuf[3];
5702   struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
5703
5704   Fput_text_property (make_number (from), make_number (to),
5705                       Qcharset, CHARSET_NAME (charset),
5706                       coding->dst_object);
5707 }
5708
5709
5710 #define CHARBUF_SIZE 0x4000
5711
5712 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
5713   do {                                                                  \
5714     int size = CHARBUF_SIZE;;                                           \
5715                                                                         \
5716     coding->charbuf = NULL;                                             \
5717     while (size > 1024)                                                 \
5718       {                                                                 \
5719         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
5720         if (coding->charbuf)                                            \
5721           break;                                                        \
5722         size >>= 1;                                                     \
5723       }                                                                 \
5724     if (! coding->charbuf)                                              \
5725       {                                                                 \
5726         coding->result = CODING_RESULT_INSUFFICIENT_MEM;                \
5727         return coding->result;                                          \
5728       }                                                                 \
5729     coding->charbuf_size = size;                                        \
5730   } while (0)
5731
5732
5733 static void
5734 produce_annotation (coding)
5735      struct coding_system *coding;
5736 {
5737   int *charbuf = coding->charbuf;
5738   int *charbuf_end = charbuf + coding->charbuf_used;
5739
5740   if (NILP (coding->dst_object))
5741     return;
5742
5743   while (charbuf < charbuf_end)
5744     {
5745       if (*charbuf >= 0)
5746         charbuf++;
5747       else
5748         {
5749           int len = -*charbuf;
5750           switch (charbuf[1])
5751             {
5752             case CODING_ANNOTATE_COMPOSITION_MASK:
5753               produce_composition (coding, charbuf);
5754               break;
5755             case CODING_ANNOTATE_CHARSET_MASK:
5756               produce_charset (coding, charbuf);
5757               break;
5758             default:
5759               abort ();
5760             }
5761           charbuf += len;
5762         }
5763     }
5764 }
5765
5766 /* Decode the data at CODING->src_object into CODING->dst_object.
5767    CODING->src_object is a buffer, a string, or nil.
5768    CODING->dst_object is a buffer.
5769
5770    If CODING->src_object is a buffer, it must be the current buffer.
5771    In this case, if CODING->src_pos is positive, it is a position of
5772    the source text in the buffer, otherwise, the source text is in the
5773    gap area of the buffer, and CODING->src_pos specifies the offset of
5774    the text from GPT (which must be the same as PT).  If this is the
5775    same buffer as CODING->dst_object, CODING->src_pos must be
5776    negative.
5777
5778    If CODING->src_object is a string, CODING->src_pos in an index to
5779    that string.
5780
5781    If CODING->src_object is nil, CODING->source must already point to
5782    the non-relocatable memory area.  In this case, CODING->src_pos is
5783    an offset from CODING->source.
5784
5785    The decoded data is inserted at the current point of the buffer
5786    CODING->dst_object.
5787 */
5788
5789 static int
5790 decode_coding (coding)
5791      struct coding_system *coding;
5792 {
5793   Lisp_Object attrs;
5794
5795   if (BUFFERP (coding->src_object)
5796       && coding->src_pos > 0
5797       && coding->src_pos < GPT
5798       && coding->src_pos + coding->src_chars > GPT)
5799     move_gap_both (coding->src_pos, coding->src_pos_byte);
5800
5801   if (BUFFERP (coding->dst_object))
5802     {
5803       if (current_buffer != XBUFFER (coding->dst_object))
5804         set_buffer_internal (XBUFFER (coding->dst_object));
5805       if (GPT != PT)
5806         move_gap_both (PT, PT_BYTE);
5807     }
5808
5809   coding->consumed = coding->consumed_char = 0;
5810   coding->produced = coding->produced_char = 0;
5811   coding->chars_at_source = 0;
5812   coding->result = CODING_RESULT_SUCCESS;
5813   coding->errors = 0;
5814
5815   ALLOC_CONVERSION_WORK_AREA (coding);
5816
5817   attrs = CODING_ID_ATTRS (coding->id);
5818
5819   do
5820     {
5821       coding_set_source (coding);
5822       coding->annotated = 0;
5823       (*(coding->decoder)) (coding);
5824       if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5825         translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs));
5826       else if (!NILP (Vstandard_translation_table_for_decode))
5827         translate_chars (coding, Vstandard_translation_table_for_decode);
5828       coding_set_destination (coding);
5829       produce_chars (coding);
5830       if (coding->annotated)
5831         produce_annotation (coding);
5832     }
5833   while (coding->consumed < coding->src_bytes
5834          && ! coding->result);
5835
5836   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5837       && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5838       && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5839     decode_eol (coding);
5840
5841   coding->carryover_bytes = 0;
5842   if (coding->consumed < coding->src_bytes)
5843     {
5844       int nbytes = coding->src_bytes - coding->consumed;
5845       unsigned char *src;
5846
5847       coding_set_source (coding);
5848       coding_set_destination (coding);
5849       src = coding->source + coding->consumed;
5850
5851       if (coding->mode & CODING_MODE_LAST_BLOCK)
5852         {
5853           /* Flush out unprocessed data as binary chars.  We are sure
5854              that the number of data is less than the size of
5855              coding->charbuf.  */
5856           while (nbytes-- > 0)
5857             {
5858               int c = *src++;
5859
5860               coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
5861             }
5862           produce_chars (coding);
5863         }
5864       else
5865         {
5866           /* Record unprocessed bytes in coding->carryover.  We are
5867              sure that the number of data is less than the size of
5868              coding->carryover.  */
5869           unsigned char *p = coding->carryover;
5870
5871           coding->carryover_bytes = nbytes;
5872           while (nbytes-- > 0)
5873             *p++ = *src++;
5874         }
5875       coding->consumed = coding->src_bytes;
5876     }
5877
5878   return coding->result;
5879 }
5880
5881
5882 /* Extract an annotation datum from a composition starting at POS and
5883    ending before LIMIT of CODING->src_object (buffer or string), store
5884    the data in BUF, set *STOP to a starting position of the next
5885    composition (if any) or to LIMIT, and return the address of the
5886    next element of BUF.
5887
5888    If such an annotation is not found, set *STOP to a starting
5889    position of a composition after POS (if any) or to LIMIT, and
5890    return BUF.  */
5891
5892 static INLINE int *
5893 handle_composition_annotation (pos, limit, coding, buf, stop)
5894      EMACS_INT pos, limit;
5895      struct coding_system *coding;
5896      int *buf;
5897      EMACS_INT *stop;
5898 {
5899   EMACS_INT start, end;
5900   Lisp_Object prop;
5901
5902   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
5903       || end > limit)
5904     *stop = limit;
5905   else if (start > pos)
5906     *stop = start;
5907   else
5908     {
5909       if (start == pos)
5910         {
5911           /* We found a composition.  Store the corresponding
5912              annotation data in BUF.  */
5913           int *head = buf;
5914           enum composition_method method = COMPOSITION_METHOD (prop);
5915           int nchars = COMPOSITION_LENGTH (prop);
5916
5917           ADD_COMPOSITION_DATA (buf, 0, nchars, method);
5918           if (method != COMPOSITION_RELATIVE)
5919             {
5920               Lisp_Object components;
5921               int len, i, i_byte;
5922
5923               components = COMPOSITION_COMPONENTS (prop);
5924               if (VECTORP (components))
5925                 {
5926                   len = XVECTOR (components)->size;
5927                   for (i = 0; i < len; i++)
5928                     *buf++ = XINT (AREF (components, i));
5929                 }
5930               else if (STRINGP (components))
5931                 {
5932                   len = XSTRING (components)->size;
5933                   i = i_byte = 0;
5934                   while (i < len)
5935                     {
5936                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
5937                       buf++;
5938                     }
5939                 }
5940               else if (INTEGERP (components))
5941                 {
5942                   len = 1;
5943                   *buf++ = XINT (components);
5944                 }
5945               else if (CONSP (components))
5946                 {
5947                   for (len = 0; CONSP (components);
5948                        len++, components = XCDR (components))
5949                     *buf++ = XINT (XCAR (components));
5950                 }
5951               else
5952                 abort ();
5953               *head -= len;
5954             }
5955         }
5956
5957       if (find_composition (end, limit, &start, &end, &prop,
5958                             coding->src_object)
5959           && end <= limit)
5960         *stop = start;
5961       else
5962         *stop = limit;
5963     }
5964   return buf;
5965 }
5966
5967
5968 /* Extract an annotation datum from a text property `charset' at POS of
5969    CODING->src_object (buffer of string), store the data in BUF, set
5970    *STOP to the position where the value of `charset' property changes
5971    (limiting by LIMIT), and return the address of the next element of
5972    BUF.
5973
5974    If the property value is nil, set *STOP to the position where the
5975    property value is non-nil (limiting by LIMIT), and return BUF.  */
5976
5977 static INLINE int *
5978 handle_charset_annotation (pos, limit, coding, buf, stop)
5979      EMACS_INT pos, limit;
5980      struct coding_system *coding;
5981      int *buf;
5982      EMACS_INT *stop;
5983 {
5984   Lisp_Object val, next;
5985   int id;
5986
5987   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
5988   if (! NILP (val) && CHARSETP (val))
5989     id = XINT (CHARSET_SYMBOL_ID (val));
5990   else
5991     id = -1;
5992   ADD_CHARSET_DATA (buf, 0, 0, id);
5993   next = Fnext_single_property_change (make_number (pos), Qcharset,
5994                                        coding->src_object,
5995                                        make_number (limit));
5996   *stop = XINT (next);
5997   return buf;
5998 }
5999
6000
6001 static void
6002 consume_chars (coding)
6003      struct coding_system *coding;
6004 {
6005   int *buf = coding->charbuf;
6006   int *buf_end = coding->charbuf + coding->charbuf_size;
6007   const unsigned char *src = coding->source + coding->consumed;
6008   const unsigned char *src_end = coding->source + coding->src_bytes;
6009   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6010   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6011   int multibytep = coding->src_multibyte;
6012   Lisp_Object eol_type;
6013   int c;
6014   EMACS_INT stop, stop_composition, stop_charset;
6015
6016   eol_type = CODING_ID_EOL_TYPE (coding->id);
6017   if (VECTORP (eol_type))
6018     eol_type = Qunix;
6019
6020   /* Note: composition handling is not yet implemented.  */
6021   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6022
6023   if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6024     stop = stop_composition = pos;
6025   else
6026     stop = stop_composition = end_pos;
6027   if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6028     stop = stop_charset = pos;
6029   else
6030     stop_charset = end_pos;
6031
6032   /* Compensate for CRLF and annotation.  */
6033   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6034   while (buf < buf_end)
6035     {
6036       if (pos == stop)
6037         {
6038           if (pos == end_pos)
6039             break;
6040           if (pos == stop_composition)
6041             buf = handle_composition_annotation (pos, end_pos, coding,
6042                                                  buf, &stop_composition);
6043           if (pos == stop_charset)
6044             buf = handle_charset_annotation (pos, end_pos, coding,
6045                                              buf, &stop_charset);
6046           stop = (stop_composition < stop_charset
6047                   ? stop_composition : stop_charset);
6048         }
6049
6050       if (! multibytep)
6051         {
6052           EMACS_INT bytes;
6053
6054           if (! CODING_FOR_UNIBYTE (coding)
6055               && (bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6056             c = STRING_CHAR_ADVANCE (src), pos += bytes;
6057           else
6058             c = *src++, pos++;
6059         }
6060       else
6061         c = STRING_CHAR_ADVANCE (src), pos++;
6062       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6063         c = '\n';
6064       if (! EQ (eol_type, Qunix))
6065         {
6066           if (c == '\n')
6067             {
6068               if (EQ (eol_type, Qdos))
6069                 *buf++ = '\r';
6070               else
6071                 c = '\r';
6072             }
6073         }
6074       *buf++ = c;
6075     }
6076
6077   coding->consumed = src - coding->source;
6078   coding->consumed_char = pos - coding->src_pos;
6079   coding->charbuf_used = buf - coding->charbuf;
6080   coding->chars_at_source = 0;
6081 }
6082
6083
6084 /* Encode the text at CODING->src_object into CODING->dst_object.
6085    CODING->src_object is a buffer or a string.
6086    CODING->dst_object is a buffer or nil.
6087
6088    If CODING->src_object is a buffer, it must be the current buffer.
6089    In this case, if CODING->src_pos is positive, it is a position of
6090    the source text in the buffer, otherwise. the source text is in the
6091    gap area of the buffer, and coding->src_pos specifies the offset of
6092    the text from GPT (which must be the same as PT).  If this is the
6093    same buffer as CODING->dst_object, CODING->src_pos must be
6094    negative and CODING should not have `pre-write-conversion'.
6095
6096    If CODING->src_object is a string, CODING should not have
6097    `pre-write-conversion'.
6098
6099    If CODING->dst_object is a buffer, the encoded data is inserted at
6100    the current point of that buffer.
6101
6102    If CODING->dst_object is nil, the encoded data is placed at the
6103    memory area specified by CODING->destination.  */
6104
6105 static int
6106 encode_coding (coding)
6107      struct coding_system *coding;
6108 {
6109   Lisp_Object attrs;
6110
6111   attrs = CODING_ID_ATTRS (coding->id);
6112
6113   if (BUFFERP (coding->dst_object))
6114     {
6115       set_buffer_internal (XBUFFER (coding->dst_object));
6116       coding->dst_multibyte
6117         = ! NILP (current_buffer->enable_multibyte_characters);
6118     }
6119
6120   coding->consumed = coding->consumed_char = 0;
6121   coding->produced = coding->produced_char = 0;
6122   coding->result = CODING_RESULT_SUCCESS;
6123   coding->errors = 0;
6124
6125   ALLOC_CONVERSION_WORK_AREA (coding);
6126
6127   do {
6128     coding_set_source (coding);
6129     consume_chars (coding);
6130
6131     if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
6132       translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs));
6133     else if (!NILP (Vstandard_translation_table_for_encode))
6134       translate_chars (coding, Vstandard_translation_table_for_encode);
6135
6136     coding_set_destination (coding);
6137     (*(coding->encoder)) (coding);
6138   } while (coding->consumed_char < coding->src_chars);
6139
6140   if (BUFFERP (coding->dst_object))
6141     insert_from_gap (coding->produced_char, coding->produced);
6142
6143   return (coding->result);
6144 }
6145
6146
6147 /* Stack of working buffers used in code conversion.  An nil element
6148    means that the code conversion of that level is not using a working
6149    buffer.  */
6150 Lisp_Object Vcode_conversion_work_buf_list;
6151
6152 /* A working buffer used by the top level conversion.  */
6153 Lisp_Object Vcode_conversion_reused_work_buf;
6154
6155
6156 /* Return a working buffer that can be freely used by the following
6157    code conversion.  MULTIBYTEP specifies the multibyteness of the
6158    buffer.  */
6159
6160 Lisp_Object
6161 make_conversion_work_buffer (multibytep, depth)
6162      int multibytep, depth;
6163 {
6164   struct buffer *current = current_buffer;
6165   Lisp_Object buf, name;
6166
6167   if (depth == 0)
6168     {
6169       if (NILP (Vcode_conversion_reused_work_buf))
6170         Vcode_conversion_reused_work_buf
6171           = Fget_buffer_create (build_string (" *code-converting-work<0>*"));
6172       buf = Vcode_conversion_reused_work_buf;
6173     }
6174   else
6175     {
6176       if (depth < 0)
6177         {
6178           name = build_string (" *code-converting-work*");
6179           name = Fgenerate_new_buffer_name (name, Qnil);
6180         }
6181       else
6182         {
6183           char str[128];
6184
6185           sprintf (str, " *code-converting-work*<%d>", depth);
6186           name = build_string (str);
6187         }
6188       buf = Fget_buffer_create (name);
6189     }
6190   set_buffer_internal (XBUFFER (buf));
6191   current_buffer->undo_list = Qt;
6192   Ferase_buffer ();
6193   Fset_buffer_multibyte (multibytep ? Qt : Qnil, Qnil);
6194   set_buffer_internal (current);
6195   return buf;
6196 }
6197
6198 static Lisp_Object
6199 code_conversion_restore (buffer)
6200      Lisp_Object buffer;
6201 {
6202   Lisp_Object workbuf;
6203
6204   workbuf = XCAR (Vcode_conversion_work_buf_list);
6205   if (! NILP (workbuf)
6206       && ! EQ (workbuf, Vcode_conversion_reused_work_buf)
6207       && ! NILP (Fbuffer_live_p (workbuf)))
6208     Fkill_buffer (workbuf);
6209   Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
6210   set_buffer_internal (XBUFFER (buffer));
6211   return Qnil;
6212 }
6213
6214 static Lisp_Object
6215 code_conversion_save (buffer, with_work_buf, multibyte)
6216      Lisp_Object buffer;
6217      int with_work_buf, multibyte;
6218 {
6219   Lisp_Object workbuf;
6220
6221   if (with_work_buf)
6222     {
6223       int depth = XINT (Flength (Vcode_conversion_work_buf_list));
6224
6225       workbuf = make_conversion_work_buffer (multibyte, depth);
6226     }
6227   else
6228     workbuf = Qnil;
6229   Vcode_conversion_work_buf_list
6230     = Fcons (workbuf, Vcode_conversion_work_buf_list);
6231   record_unwind_protect (code_conversion_restore, buffer);
6232   return workbuf;
6233 }
6234
6235 int
6236 decode_coding_gap (coding, chars, bytes)
6237      struct coding_system *coding;
6238      EMACS_INT chars, bytes;
6239 {
6240   int count = specpdl_ptr - specpdl;
6241   Lisp_Object attrs;
6242   Lisp_Object buffer;
6243
6244   buffer = Fcurrent_buffer ();
6245   code_conversion_save (buffer, 0, 0);
6246
6247   coding->src_object = buffer;
6248   coding->src_chars = chars;
6249   coding->src_bytes = bytes;
6250   coding->src_pos = -chars;
6251   coding->src_pos_byte = -bytes;
6252   coding->src_multibyte = chars < bytes;
6253   coding->dst_object = buffer;
6254   coding->dst_pos = PT;
6255   coding->dst_pos_byte = PT_BYTE;
6256   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6257   coding->mode |= CODING_MODE_LAST_BLOCK;
6258
6259   if (CODING_REQUIRE_DETECTION (coding))
6260     detect_coding (coding);
6261
6262   decode_coding (coding);
6263
6264   attrs = CODING_ID_ATTRS (coding->id);
6265   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6266     {
6267       struct gcpro gcpro1;
6268       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6269       Lisp_Object val;
6270
6271       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6272       GCPRO1 (buffer);
6273       val = call1 (CODING_ATTR_POST_READ (attrs),
6274                    make_number (coding->produced_char));
6275       UNGCPRO;
6276       CHECK_NATNUM (val);
6277       coding->produced_char += Z - prev_Z;
6278       coding->produced += Z_BYTE - prev_Z_BYTE;
6279     }
6280
6281   unbind_to (count, Qnil);
6282   return coding->result;
6283 }
6284
6285 int
6286 encode_coding_gap (coding, chars, bytes)
6287      struct coding_system *coding;
6288      EMACS_INT chars, bytes;
6289 {
6290   int count = specpdl_ptr - specpdl;
6291   Lisp_Object buffer;
6292
6293   buffer = Fcurrent_buffer ();
6294   code_conversion_save (buffer, 0, 0);
6295
6296   coding->src_object = buffer;
6297   coding->src_chars = chars;
6298   coding->src_bytes = bytes;
6299   coding->src_pos = -chars;
6300   coding->src_pos_byte = -bytes;
6301   coding->src_multibyte = chars < bytes;
6302   coding->dst_object = coding->src_object;
6303   coding->dst_pos = PT;
6304   coding->dst_pos_byte = PT_BYTE;
6305
6306   encode_coding (coding);
6307
6308   unbind_to (count, Qnil);
6309   return coding->result;
6310 }
6311
6312
6313 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6314    SRC_OBJECT into DST_OBJECT by coding context CODING.
6315
6316    SRC_OBJECT is a buffer, a string, or Qnil.
6317
6318    If it is a buffer, the text is at point of the buffer.  FROM and TO
6319    are positions in the buffer.
6320
6321    If it is a string, the text is at the beginning of the string.
6322    FROM and TO are indices to the string.
6323
6324    If it is nil, the text is at coding->source.  FROM and TO are
6325    indices to coding->source.
6326
6327    DST_OBJECT is a buffer, Qt, or Qnil.
6328
6329    If it is a buffer, the decoded text is inserted at point of the
6330    buffer.  If the buffer is the same as SRC_OBJECT, the source text
6331    is deleted.
6332
6333    If it is Qt, a string is made from the decoded text, and
6334    set in CODING->dst_object.
6335
6336    If it is Qnil, the decoded text is stored at CODING->destination.
6337    The caller must allocate CODING->dst_bytes bytes at
6338    CODING->destination by xmalloc.  If the decoded text is longer than
6339    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6340  */
6341
6342 void
6343 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6344                       dst_object)
6345      struct coding_system *coding;
6346      Lisp_Object src_object;
6347      EMACS_INT from, from_byte, to, to_byte;
6348      Lisp_Object dst_object;
6349 {
6350   int count = specpdl_ptr - specpdl;
6351   unsigned char *destination;
6352   EMACS_INT dst_bytes;
6353   EMACS_INT chars = to - from;
6354   EMACS_INT bytes = to_byte - from_byte;
6355   Lisp_Object attrs;
6356   Lisp_Object buffer;
6357   int saved_pt = -1, saved_pt_byte;
6358
6359   buffer = Fcurrent_buffer ();
6360
6361   if (NILP (dst_object))
6362     {
6363       destination = coding->destination;
6364       dst_bytes = coding->dst_bytes;
6365     }
6366
6367   coding->src_object = src_object;
6368   coding->src_chars = chars;
6369   coding->src_bytes = bytes;
6370   coding->src_multibyte = chars < bytes;
6371
6372   if (STRINGP (src_object))
6373     {
6374       coding->src_pos = from;
6375       coding->src_pos_byte = from_byte;
6376     }
6377   else if (BUFFERP (src_object))
6378     {
6379       set_buffer_internal (XBUFFER (src_object));
6380       if (from != GPT)
6381         move_gap_both (from, from_byte);
6382       if (EQ (src_object, dst_object))
6383         {
6384           saved_pt = PT, saved_pt_byte = PT_BYTE;
6385           TEMP_SET_PT_BOTH (from, from_byte);
6386           del_range_both (from, from_byte, to, to_byte, 1);
6387           coding->src_pos = -chars;
6388           coding->src_pos_byte = -bytes;
6389         }
6390       else
6391         {
6392           coding->src_pos = from;
6393           coding->src_pos_byte = from_byte;
6394         }
6395     }
6396
6397   if (CODING_REQUIRE_DETECTION (coding))
6398     detect_coding (coding);
6399   attrs = CODING_ID_ATTRS (coding->id);
6400
6401   if (EQ (dst_object, Qt)
6402       || (! NILP (CODING_ATTR_POST_READ (attrs))
6403           && NILP (dst_object)))
6404     {
6405       coding->dst_object = code_conversion_save (buffer, 1, 1);
6406       coding->dst_pos = BEG;
6407       coding->dst_pos_byte = BEG_BYTE;
6408       coding->dst_multibyte = 1;
6409     }
6410   else if (BUFFERP (dst_object))
6411     {
6412       code_conversion_save (buffer, 0, 0);
6413       coding->dst_object = dst_object;
6414       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6415       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6416       coding->dst_multibyte
6417         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6418     }
6419   else
6420     {
6421       code_conversion_save (buffer, 0, 0);
6422       coding->dst_object = Qnil;
6423       coding->dst_multibyte = 1;
6424     }
6425
6426   decode_coding (coding);
6427
6428   if (BUFFERP (coding->dst_object))
6429     set_buffer_internal (XBUFFER (coding->dst_object));
6430
6431   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6432     {
6433       struct gcpro gcpro1, gcpro2;
6434       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6435       Lisp_Object val;
6436
6437       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6438       GCPRO2 (coding->src_object, coding->dst_object);
6439       val = call1 (CODING_ATTR_POST_READ (attrs),
6440                    make_number (coding->produced_char));
6441       UNGCPRO;
6442       CHECK_NATNUM (val);
6443       coding->produced_char += Z - prev_Z;
6444       coding->produced += Z_BYTE - prev_Z_BYTE;
6445     }
6446
6447   if (EQ (dst_object, Qt))
6448     {
6449       coding->dst_object = Fbuffer_string ();
6450     }
6451   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6452     {
6453       set_buffer_internal (XBUFFER (coding->dst_object));
6454       if (dst_bytes < coding->produced)
6455         {
6456           destination
6457             = (unsigned char *) xrealloc (destination, coding->produced);
6458           if (! destination)
6459             {
6460               coding->result = CODING_RESULT_INSUFFICIENT_DST;
6461               unbind_to (count, Qnil);
6462               return;
6463             }
6464           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6465             move_gap_both (BEGV, BEGV_BYTE);
6466           bcopy (BEGV_ADDR, destination, coding->produced);
6467           coding->destination = destination;
6468         }
6469     }
6470
6471   if (saved_pt >= 0)
6472     {
6473       /* This is the case of:
6474          (BUFFERP (src_object) && EQ (src_object, dst_object))
6475          As we have moved PT while replacing the original buffer
6476          contents, we must recover it now.  */
6477       set_buffer_internal (XBUFFER (src_object));
6478       if (saved_pt < from)
6479         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6480       else if (saved_pt < from + chars)
6481         TEMP_SET_PT_BOTH (from, from_byte);
6482       else if (! NILP (current_buffer->enable_multibyte_characters))
6483         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6484                           saved_pt_byte + (coding->produced - bytes));
6485       else
6486         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6487                           saved_pt_byte + (coding->produced - bytes));
6488     }
6489
6490   unbind_to (count, Qnil);
6491 }
6492
6493
6494 void
6495 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6496                       dst_object)
6497      struct coding_system *coding;
6498      Lisp_Object src_object;
6499      EMACS_INT from, from_byte, to, to_byte;
6500      Lisp_Object dst_object;
6501 {
6502   int count = specpdl_ptr - specpdl;
6503   EMACS_INT chars = to - from;
6504   EMACS_INT bytes = to_byte - from_byte;
6505   Lisp_Object attrs;
6506   Lisp_Object buffer;
6507   int saved_pt = -1, saved_pt_byte;
6508
6509   buffer = Fcurrent_buffer ();
6510
6511   coding->src_object = src_object;
6512   coding->src_chars = chars;
6513   coding->src_bytes = bytes;
6514   coding->src_multibyte = chars < bytes;
6515
6516   attrs = CODING_ID_ATTRS (coding->id);
6517
6518   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6519     {
6520       coding->src_object = code_conversion_save (buffer, 1,
6521                                                  coding->src_multibyte);
6522       set_buffer_internal (XBUFFER (coding->src_object));
6523       if (STRINGP (src_object))
6524         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6525       else if (BUFFERP (src_object))
6526         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6527       else
6528         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6529
6530       if (EQ (src_object, dst_object))
6531         {
6532           set_buffer_internal (XBUFFER (src_object));
6533           saved_pt = PT, saved_pt_byte = PT_BYTE;
6534           del_range_both (from, from_byte, to, to_byte, 1);
6535           set_buffer_internal (XBUFFER (coding->src_object));
6536         }
6537
6538       call2 (CODING_ATTR_PRE_WRITE (attrs),
6539              make_number (BEG), make_number (Z));
6540       coding->src_object = Fcurrent_buffer ();
6541       if (BEG != GPT)
6542         move_gap_both (BEG, BEG_BYTE);
6543       coding->src_chars = Z - BEG;
6544       coding->src_bytes = Z_BYTE - BEG_BYTE;
6545       coding->src_pos = BEG;
6546       coding->src_pos_byte = BEG_BYTE;
6547       coding->src_multibyte = Z < Z_BYTE;
6548     }
6549   else if (STRINGP (src_object))
6550     {
6551       code_conversion_save (buffer, 0, 0);
6552       coding->src_pos = from;
6553       coding->src_pos_byte = from_byte;
6554     }
6555   else if (BUFFERP (src_object))
6556     {
6557       code_conversion_save (buffer, 0, 0);
6558       set_buffer_internal (XBUFFER (src_object));
6559       if (EQ (src_object, dst_object))
6560         {
6561           saved_pt = PT, saved_pt_byte = PT_BYTE;
6562           coding->src_object = del_range_1 (from, to, 1, 1);
6563           coding->src_pos = 0;
6564           coding->src_pos_byte = 0;
6565         }
6566       else
6567         {
6568           if (from < GPT && to >= GPT)
6569             move_gap_both (from, from_byte);
6570           coding->src_pos = from;
6571           coding->src_pos_byte = from_byte;
6572         }
6573     }
6574   else
6575     code_conversion_save (buffer, 0, 0);
6576
6577   if (BUFFERP (dst_object))
6578     {
6579       coding->dst_object = dst_object;
6580       if (EQ (src_object, dst_object))
6581         {
6582           coding->dst_pos = from;
6583           coding->dst_pos_byte = from_byte;
6584         }
6585       else
6586         {
6587           coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6588           coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6589         }
6590       coding->dst_multibyte
6591         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6592     }
6593   else if (EQ (dst_object, Qt))
6594     {
6595       coding->dst_object = Qnil;
6596       coding->dst_bytes = coding->src_chars;
6597       if (coding->dst_bytes == 0)
6598         coding->dst_bytes = 1;
6599       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
6600       coding->dst_multibyte = 0;
6601     }
6602   else
6603     {
6604       coding->dst_object = Qnil;
6605       coding->dst_multibyte = 0;
6606     }
6607
6608   encode_coding (coding);
6609
6610   if (EQ (dst_object, Qt))
6611     {
6612       if (BUFFERP (coding->dst_object))
6613         coding->dst_object = Fbuffer_string ();
6614       else
6615         {
6616           coding->dst_object
6617             = make_unibyte_string ((char *) coding->destination,
6618                                    coding->produced);
6619           xfree (coding->destination);
6620         }
6621     }
6622
6623   if (saved_pt >= 0)
6624     {
6625       /* This is the case of:
6626          (BUFFERP (src_object) && EQ (src_object, dst_object))
6627          As we have moved PT while replacing the original buffer
6628          contents, we must recover it now.  */
6629       set_buffer_internal (XBUFFER (src_object));
6630       if (saved_pt < from)
6631         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6632       else if (saved_pt < from + chars)
6633         TEMP_SET_PT_BOTH (from, from_byte);
6634       else if (! NILP (current_buffer->enable_multibyte_characters))
6635         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6636                           saved_pt_byte + (coding->produced - bytes));
6637       else
6638         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6639                           saved_pt_byte + (coding->produced - bytes));
6640     }
6641
6642   unbind_to (count, Qnil);
6643 }
6644
6645
6646 Lisp_Object
6647 preferred_coding_system ()
6648 {
6649   int id = coding_categories[coding_priorities[0]].id;
6650
6651   return CODING_ID_NAME (id);
6652 }
6653
6654 \f
6655 #ifdef emacs
6656 /*** 8. Emacs Lisp library functions ***/
6657
6658 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6659        doc: /* Return t if OBJECT is nil or a coding-system.
6660 See the documentation of `define-coding-system' for information
6661 about coding-system objects.  */)
6662      (obj)
6663      Lisp_Object obj;
6664 {
6665   return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6666 }
6667
6668 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6669        Sread_non_nil_coding_system, 1, 1, 0,
6670        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6671      (prompt)
6672      Lisp_Object prompt;
6673 {
6674   Lisp_Object val;
6675   do
6676     {
6677       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6678                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6679     }
6680   while (XSTRING (val)->size == 0);
6681   return (Fintern (val, Qnil));
6682 }
6683
6684 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6685        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6686 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6687      (prompt, default_coding_system)
6688      Lisp_Object prompt, default_coding_system;
6689 {
6690   Lisp_Object val;
6691   if (SYMBOLP (default_coding_system))
6692     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
6693   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6694                           Qt, Qnil, Qcoding_system_history,
6695                           default_coding_system, Qnil);
6696   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
6697 }
6698
6699 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6700        1, 1, 0,
6701        doc: /* Check validity of CODING-SYSTEM.
6702 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.  */)
6703   (coding_system)
6704      Lisp_Object coding_system;
6705 {
6706   CHECK_SYMBOL (coding_system);
6707   if (!NILP (Fcoding_system_p (coding_system)))
6708     return coding_system;
6709   while (1)
6710     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6711 }
6712
6713 \f
6714 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
6715    HIGHEST is nonzero, return the coding system of the highest
6716    priority among the detected coding systems.  Otherwize return a
6717    list of detected coding systems sorted by their priorities.  If
6718    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6719    multibyte form but contains only ASCII and eight-bit chars.
6720    Otherwise, the bytes are raw bytes.
6721
6722    CODING-SYSTEM controls the detection as below:
6723
6724    If it is nil, detect both text-format and eol-format.  If the
6725    text-format part of CODING-SYSTEM is already specified
6726    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
6727    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6728    detect only text-format.  */
6729
6730 Lisp_Object
6731 detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
6732      unsigned char *src;
6733      int src_bytes, highest;
6734      int multibytep;
6735      Lisp_Object coding_system;
6736 {
6737   unsigned char *src_end = src + src_bytes;
6738   Lisp_Object attrs, eol_type;
6739   Lisp_Object val;
6740   struct coding_system coding;
6741   int id;
6742   struct coding_detection_info detect_info;
6743
6744   if (NILP (coding_system))
6745     coding_system = Qundecided;
6746   setup_coding_system (coding_system, &coding);
6747   attrs = CODING_ID_ATTRS (coding.id);
6748   eol_type = CODING_ID_EOL_TYPE (coding.id);
6749   coding_system = CODING_ATTR_BASE_NAME (attrs);
6750
6751   coding.source = src;
6752   coding.src_bytes = src_bytes;
6753   coding.src_multibyte = multibytep;
6754   coding.consumed = 0;
6755   coding.mode |= CODING_MODE_LAST_BLOCK;
6756
6757   detect_info.checked = detect_info.found = detect_info.rejected = 0;
6758
6759   /* At first, detect text-format if necessary.  */
6760   if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided)
6761     {
6762       enum coding_category category;
6763       struct coding_system *this;
6764       int c, i;
6765
6766       for (; src < src_end; src++)
6767         {
6768           c = *src;
6769           if (c & 0x80
6770               || (c < 0x20 && (c == ISO_CODE_ESC
6771                                || c == ISO_CODE_SI
6772                                || c == ISO_CODE_SO)))
6773             break;
6774         }
6775       coding.head_ascii = src - coding.source;
6776
6777       if (src < src_end)
6778         for (i = 0; i < coding_category_raw_text; i++)
6779           {
6780             category = coding_priorities[i];
6781             this = coding_categories + category;
6782
6783             if (this->id < 0)
6784               {
6785                 /* No coding system of this category is defined.  */
6786                 detect_info.rejected |= (1 << category);
6787               }
6788             else if (category >= coding_category_raw_text)
6789               continue;
6790             else if (detect_info.checked & (1 << category))
6791               {
6792                 if (highest
6793                     && (detect_info.found & (1 << category)))
6794                   break;
6795               }
6796             else
6797               {
6798                 if ((*(this->detector)) (&coding, &detect_info)
6799                     && highest
6800                     && (detect_info.found & (1 << category)))
6801                   break;
6802               }
6803           }
6804
6805
6806       if (detect_info.rejected == CATEGORY_MASK_ANY)
6807         {
6808           detect_info.found = CATEGORY_MASK_RAW_TEXT;
6809           id = coding_categories[coding_category_raw_text].id;
6810           val = Fcons (make_number (id), Qnil);
6811         }
6812       else if (! detect_info.rejected && ! detect_info.found)
6813         {
6814           detect_info.found = CATEGORY_MASK_ANY;
6815           id = coding_categories[coding_category_undecided].id;
6816           val = Fcons (make_number (id), Qnil);
6817         }
6818       else if (highest)
6819         {
6820           if (detect_info.found)
6821             {
6822               detect_info.found = 1 << category;
6823               val = Fcons (make_number (this->id), Qnil);
6824             }
6825           else
6826             for (i = 0; i < coding_category_raw_text; i++)
6827               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6828                 {
6829                   detect_info.found = 1 << coding_priorities[i];
6830                   id = coding_categories[coding_priorities[i]].id;
6831                   val = Fcons (make_number (id), Qnil);
6832                   break;
6833                 }
6834         }
6835       else
6836         {
6837           int mask = detect_info.rejected | detect_info.found;
6838           int found = 0;
6839           val = Qnil;
6840
6841           for (i = coding_category_raw_text - 1; i >= 0; i--)
6842             {
6843               category = coding_priorities[i];
6844               if (! (mask & (1 << category)))
6845                 {
6846                   found |= 1 << category;
6847                   id = coding_categories[category].id;
6848                   val = Fcons (make_number (id), val);
6849                 }
6850             }
6851           for (i = coding_category_raw_text - 1; i >= 0; i--)
6852             {
6853               category = coding_priorities[i];
6854               if (detect_info.found & (1 << category))
6855                 {
6856                   id = coding_categories[category].id;
6857                   val = Fcons (make_number (id), val);
6858                 }
6859             }
6860           detect_info.found |= found;
6861         }
6862     }
6863   else
6864     {
6865       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
6866       val = Fcons (make_number (coding.id), Qnil);
6867     }
6868
6869   /* Then, detect eol-format if necessary.  */
6870   {
6871     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
6872     Lisp_Object tail;
6873
6874     if (VECTORP (eol_type))
6875       {
6876         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
6877           normal_eol = detect_eol (coding.source, src_bytes,
6878                                    coding_category_raw_text);
6879         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
6880                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
6881           utf_16_be_eol = detect_eol (coding.source, src_bytes,
6882                                       coding_category_utf_16_be);
6883         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
6884                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
6885           utf_16_le_eol = detect_eol (coding.source, src_bytes,
6886                                       coding_category_utf_16_le);
6887       }
6888     else
6889       {
6890         if (EQ (eol_type, Qunix))
6891           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
6892         else if (EQ (eol_type, Qdos))
6893           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
6894         else
6895           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
6896       }
6897
6898     for (tail = val; CONSP (tail); tail = XCDR (tail))
6899       {
6900         enum coding_category category;
6901         int this_eol;
6902
6903         id = XINT (XCAR (tail));
6904         attrs = CODING_ID_ATTRS (id);
6905         category = XINT (CODING_ATTR_CATEGORY (attrs));
6906         eol_type = CODING_ID_EOL_TYPE (id);
6907         if (VECTORP (eol_type))
6908           {
6909             if (category == coding_category_utf_16_be
6910                 || category == coding_category_utf_16_be_nosig)
6911               this_eol = utf_16_be_eol;
6912             else if (category == coding_category_utf_16_le
6913                      || category == coding_category_utf_16_le_nosig)
6914               this_eol = utf_16_le_eol;
6915             else
6916               this_eol = normal_eol;
6917
6918             if (this_eol == EOL_SEEN_LF)
6919               XSETCAR (tail, AREF (eol_type, 0));
6920             else if (this_eol == EOL_SEEN_CRLF)
6921               XSETCAR (tail, AREF (eol_type, 1));
6922             else if (this_eol == EOL_SEEN_CR)
6923               XSETCAR (tail, AREF (eol_type, 2));
6924             else
6925               XSETCAR (tail, CODING_ID_NAME (id));
6926           }
6927         else
6928           XSETCAR (tail, CODING_ID_NAME (id));
6929       }
6930   }
6931
6932   return (highest ? XCAR (val) : val);
6933 }
6934
6935
6936 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6937        2, 3, 0,
6938        doc: /* Detect coding system of the text in the region between START and END.
6939 Return a list of possible coding systems ordered by priority.
6940
6941 If only ASCII characters are found, it returns a list of single element
6942 `undecided' or its subsidiary coding system according to a detected
6943 end-of-line format.
6944
6945 If optional argument HIGHEST is non-nil, return the coding system of
6946 highest priority.  */)
6947      (start, end, highest)
6948      Lisp_Object start, end, highest;
6949 {
6950   int from, to;
6951   int from_byte, to_byte;
6952
6953   CHECK_NUMBER_COERCE_MARKER (start);
6954   CHECK_NUMBER_COERCE_MARKER (end);
6955
6956   validate_region (&start, &end);
6957   from = XINT (start), to = XINT (end);
6958   from_byte = CHAR_TO_BYTE (from);
6959   to_byte = CHAR_TO_BYTE (to);
6960
6961   if (from < GPT && to >= GPT)
6962     move_gap_both (to, to_byte);
6963
6964   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6965                                to_byte - from_byte,
6966                                !NILP (highest),
6967                                !NILP (current_buffer
6968                                       ->enable_multibyte_characters),
6969                                Qnil);
6970 }
6971
6972 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6973        1, 2, 0,
6974        doc: /* Detect coding system of the text in STRING.
6975 Return a list of possible coding systems ordered by priority.
6976
6977 If only ASCII characters are found, it returns a list of single element
6978 `undecided' or its subsidiary coding system according to a detected
6979 end-of-line format.
6980
6981 If optional argument HIGHEST is non-nil, return the coding system of
6982 highest priority.  */)
6983      (string, highest)
6984      Lisp_Object string, highest;
6985 {
6986   CHECK_STRING (string);
6987
6988   return detect_coding_system (XSTRING (string)->data,
6989                                STRING_BYTES (XSTRING (string)),
6990                                !NILP (highest),
6991                                STRING_MULTIBYTE (string),
6992                                Qnil);
6993 }
6994
6995
6996 static INLINE int
6997 char_encodable_p (c, attrs)
6998      int c;
6999      Lisp_Object attrs;
7000 {
7001   Lisp_Object tail;
7002   struct charset *charset;
7003
7004   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7005        CONSP (tail); tail = XCDR (tail))
7006     {
7007       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7008       if (CHAR_CHARSET_P (c, charset))
7009         break;
7010     }
7011   return (! NILP (tail));
7012 }
7013
7014
7015 /* Return a list of coding systems that safely encode the text between
7016    START and END.  If EXCLUDE is non-nil, it is a list of coding
7017    systems not to check.  The returned list doesn't contain any such
7018    coding systems.  In any case, if the text contains only ASCII or is
7019    unibyte, return t.  */
7020
7021 DEFUN ("find-coding-systems-region-internal",
7022        Ffind_coding_systems_region_internal,
7023        Sfind_coding_systems_region_internal, 2, 3, 0,
7024        doc: /* Internal use only.  */)
7025      (start, end, exclude)
7026      Lisp_Object start, end, exclude;
7027 {
7028   Lisp_Object coding_attrs_list, safe_codings;
7029   EMACS_INT start_byte, end_byte;
7030   const unsigned char *p, *pbeg, *pend;
7031   int c;
7032   Lisp_Object tail, elt;
7033
7034   if (STRINGP (start))
7035     {
7036       if (!STRING_MULTIBYTE (start)
7037           || XSTRING (start)->size == STRING_BYTES (XSTRING (start)))
7038         return Qt;
7039       start_byte = 0;
7040       end_byte = STRING_BYTES (XSTRING (start));
7041     }
7042   else
7043     {
7044       CHECK_NUMBER_COERCE_MARKER (start);
7045       CHECK_NUMBER_COERCE_MARKER (end);
7046       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7047         args_out_of_range (start, end);
7048       if (NILP (current_buffer->enable_multibyte_characters))
7049         return Qt;
7050       start_byte = CHAR_TO_BYTE (XINT (start));
7051       end_byte = CHAR_TO_BYTE (XINT (end));
7052       if (XINT (end) - XINT (start) == end_byte - start_byte)
7053         return Qt;
7054
7055       if (XINT (start) < GPT && XINT (end) > GPT)
7056         {
7057           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7058             move_gap_both (XINT (start), start_byte);
7059           else
7060             move_gap_both (XINT (end), end_byte);
7061         }
7062     }
7063
7064   coding_attrs_list = Qnil;
7065   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7066     if (NILP (exclude)
7067         || NILP (Fmemq (XCAR (tail), exclude)))
7068       {
7069         Lisp_Object attrs;
7070
7071         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7072         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7073             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7074           coding_attrs_list = Fcons (attrs, coding_attrs_list);
7075       }
7076
7077   if (STRINGP (start))
7078     p = pbeg = XSTRING (start)->data;
7079   else
7080     p = pbeg = BYTE_POS_ADDR (start_byte);
7081   pend = p + (end_byte - start_byte);
7082
7083   while (p < pend && ASCII_BYTE_P (*p)) p++;
7084   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7085
7086   while (p < pend)
7087     {
7088       if (ASCII_BYTE_P (*p))
7089         p++;
7090       else
7091         {
7092           c = STRING_CHAR_ADVANCE (p);
7093
7094           charset_map_loaded = 0;
7095           for (tail = coding_attrs_list; CONSP (tail);)
7096             {
7097               elt = XCAR (tail);
7098               if (NILP (elt))
7099                 tail = XCDR (tail);
7100               else if (char_encodable_p (c, elt))
7101                 tail = XCDR (tail);
7102               else if (CONSP (XCDR (tail)))
7103                 {
7104                   XSETCAR (tail, XCAR (XCDR (tail)));
7105                   XSETCDR (tail, XCDR (XCDR (tail)));
7106                 }
7107               else
7108                 {
7109                   XSETCAR (tail, Qnil);
7110                   tail = XCDR (tail);
7111                 }
7112             }
7113           if (charset_map_loaded)
7114             {
7115               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7116
7117               if (STRINGP (start))
7118                 pbeg = XSTRING (start)->data;
7119               else
7120                 pbeg = BYTE_POS_ADDR (start_byte);
7121               p = pbeg + p_offset;
7122               pend = pbeg + pend_offset;
7123             }
7124         }
7125     }
7126
7127   safe_codings = Qnil;
7128   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7129     if (! NILP (XCAR (tail)))
7130       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
7131
7132   return safe_codings;
7133 }
7134
7135
7136 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7137        Scheck_coding_systems_region, 3, 3, 0,
7138        doc: /* Check if the region is encodable by coding systems.
7139
7140 START and END are buffer positions specifying the region.
7141 CODING-SYSTEM-LIST is a list of coding systems to check.
7142
7143 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7144 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7145 whole region, POS0, POS1, ... are buffer positions where non-encodable
7146 characters are found.
7147
7148 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7149 value is nil.
7150
7151 START may be a string.  In that case, check if the string is
7152 encodable, and the value contains indices to the string instead of
7153 buffer positions.  END is ignored.  */)
7154      (start, end, coding_system_list)
7155      Lisp_Object start, end, coding_system_list;
7156 {
7157   Lisp_Object list;
7158   EMACS_INT start_byte, end_byte;
7159   int pos;
7160   const unsigned char *p, *pbeg, *pend;
7161   int c;
7162   Lisp_Object tail, elt;
7163
7164   if (STRINGP (start))
7165     {
7166       if (!STRING_MULTIBYTE (start)
7167           && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
7168         return Qnil;
7169       start_byte = 0;
7170       end_byte = STRING_BYTES (XSTRING (start));
7171       pos = 0;
7172     }
7173   else
7174     {
7175       CHECK_NUMBER_COERCE_MARKER (start);
7176       CHECK_NUMBER_COERCE_MARKER (end);
7177       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7178         args_out_of_range (start, end);
7179       if (NILP (current_buffer->enable_multibyte_characters))
7180         return Qnil;
7181       start_byte = CHAR_TO_BYTE (XINT (start));
7182       end_byte = CHAR_TO_BYTE (XINT (end));
7183       if (XINT (end) - XINT (start) == end_byte - start_byte)
7184         return Qt;
7185
7186       if (XINT (start) < GPT && XINT (end) > GPT)
7187         {
7188           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7189             move_gap_both (XINT (start), start_byte);
7190           else
7191             move_gap_both (XINT (end), end_byte);
7192         }
7193       pos = XINT (start);
7194     }
7195
7196   list = Qnil;
7197   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7198     {
7199       elt = XCAR (tail);
7200       list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
7201                                        Qnil)),
7202                     list);
7203     }
7204
7205   if (STRINGP (start))
7206     p = pbeg = XSTRING (start)->data;
7207   else
7208     p = pbeg = BYTE_POS_ADDR (start_byte);
7209   pend = p + (end_byte - start_byte);
7210
7211   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7212   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7213
7214   while (p < pend)
7215     {
7216       if (ASCII_BYTE_P (*p))
7217         p++;
7218       else
7219         {
7220           c = STRING_CHAR_ADVANCE (p);
7221
7222           charset_map_loaded = 0;
7223           for (tail = list; CONSP (tail); tail = XCDR (tail))
7224             {
7225               elt = XCDR (XCAR (tail));
7226               if (! char_encodable_p (c, XCAR (elt)))
7227                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7228             }
7229           if (charset_map_loaded)
7230             {
7231               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7232
7233               if (STRINGP (start))
7234                 pbeg = XSTRING (start)->data;
7235               else
7236                 pbeg = BYTE_POS_ADDR (start_byte);
7237               p = pbeg + p_offset;
7238               pend = pbeg + pend_offset;
7239             }
7240         }
7241       pos++;
7242     }
7243
7244   tail = list;
7245   list = Qnil;
7246   for (; CONSP (tail); tail = XCDR (tail))
7247     {
7248       elt = XCAR (tail);
7249       if (CONSP (XCDR (XCDR (elt))))
7250         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7251                       list);
7252     }
7253
7254   return list;
7255 }
7256
7257
7258
7259 Lisp_Object
7260 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7261      Lisp_Object start, end, coding_system, dst_object;
7262      int encodep, norecord;
7263 {
7264   struct coding_system coding;
7265   EMACS_INT from, from_byte, to, to_byte;
7266   Lisp_Object src_object;
7267
7268   CHECK_NUMBER_COERCE_MARKER (start);
7269   CHECK_NUMBER_COERCE_MARKER (end);
7270   if (NILP (coding_system))
7271     coding_system = Qno_conversion;
7272   else
7273     CHECK_CODING_SYSTEM (coding_system);
7274   src_object = Fcurrent_buffer ();
7275   if (NILP (dst_object))
7276     dst_object = src_object;
7277   else if (! EQ (dst_object, Qt))
7278     CHECK_BUFFER (dst_object);
7279
7280   validate_region (&start, &end);
7281   from = XFASTINT (start);
7282   from_byte = CHAR_TO_BYTE (from);
7283   to = XFASTINT (end);
7284   to_byte = CHAR_TO_BYTE (to);
7285
7286   setup_coding_system (coding_system, &coding);
7287   coding.mode |= CODING_MODE_LAST_BLOCK;
7288
7289   if (encodep)
7290     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7291                           dst_object);
7292   else
7293     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7294                           dst_object);
7295   if (! norecord)
7296     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7297
7298   if (coding.result != CODING_RESULT_SUCCESS)
7299     error ("Code conversion error: %d", coding.result);
7300
7301   return (BUFFERP (dst_object)
7302           ? make_number (coding.produced_char)
7303           : coding.dst_object);
7304 }
7305
7306
7307 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
7308        3, 4, "r\nzCoding system: ",
7309        doc: /* Decode the current region from the specified coding system.
7310 When called from a program, takes four arguments:
7311         START, END, CODING-SYSTEM, and DESTINATION.
7312 START and END are buffer positions.
7313
7314 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7315 If nil, the region between START and END is replace by the decoded text.
7316 If buffer, the decoded text is inserted in the buffer.
7317 If t, the decoded text is returned.
7318
7319 This function sets `last-coding-system-used' to the precise coding system
7320 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7321 not fully specified.)
7322 It returns the length of the decoded text.  */)
7323      (start, end, coding_system, destination)
7324      Lisp_Object start, end, coding_system, destination;
7325 {
7326   return code_convert_region (start, end, coding_system, destination, 0, 0);
7327 }
7328
7329 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
7330        3, 4, "r\nzCoding system: ",
7331        doc: /* Encode the current region by specified coding system.
7332 When called from a program, takes three arguments:
7333 START, END, and CODING-SYSTEM.  START and END are buffer positions.
7334
7335 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7336 If nil, the region between START and END is replace by the encoded text.
7337 If buffer, the encoded text is inserted in the buffer.
7338 If t, the encoded text is returned.
7339
7340 This function sets `last-coding-system-used' to the precise coding system
7341 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7342 not fully specified.)
7343 It returns the length of the encoded text.  */)
7344   (start, end, coding_system, destination)
7345      Lisp_Object start, end, coding_system, destination;
7346 {
7347   return code_convert_region (start, end, coding_system, destination, 1, 0);
7348 }
7349
7350 Lisp_Object
7351 code_convert_string (string, coding_system, dst_object,
7352                      encodep, nocopy, norecord)
7353      Lisp_Object string, coding_system, dst_object;
7354      int encodep, nocopy, norecord;
7355 {
7356   struct coding_system coding;
7357   EMACS_INT chars, bytes;
7358
7359   CHECK_STRING (string);
7360   if (NILP (coding_system))
7361     {
7362       if (! norecord)
7363         Vlast_coding_system_used = Qno_conversion;
7364       if (NILP (dst_object))
7365         return (nocopy ? Fcopy_sequence (string) : string);
7366     }
7367
7368   if (NILP (coding_system))
7369     coding_system = Qno_conversion;
7370   else
7371     CHECK_CODING_SYSTEM (coding_system);
7372   if (NILP (dst_object))
7373     dst_object = Qt;
7374   else if (! EQ (dst_object, Qt))
7375     CHECK_BUFFER (dst_object);
7376
7377   setup_coding_system (coding_system, &coding);
7378   coding.mode |= CODING_MODE_LAST_BLOCK;
7379   chars = XSTRING (string)->size;
7380   bytes = STRING_BYTES (XSTRING (string));
7381   if (encodep)
7382     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7383   else
7384     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7385   if (! norecord)
7386     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7387
7388   if (coding.result != CODING_RESULT_SUCCESS)
7389     error ("Code conversion error: %d", coding.result);
7390
7391   return (BUFFERP (dst_object)
7392           ? make_number (coding.produced_char)
7393           : coding.dst_object);
7394 }
7395
7396
7397 /* Encode or decode STRING according to CODING_SYSTEM.
7398    Do not set Vlast_coding_system_used.
7399
7400    This function is called only from macros DECODE_FILE and
7401    ENCODE_FILE, thus we ignore character composition.  */
7402
7403 Lisp_Object
7404 code_convert_string_norecord (string, coding_system, encodep)
7405      Lisp_Object string, coding_system;
7406      int encodep;
7407 {
7408   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
7409 }
7410
7411
7412 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7413        2, 4, 0,
7414        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7415
7416 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7417 if the decoding operation is trivial.
7418
7419 Optional fourth arg BUFFER non-nil meant that the decoded text is
7420 inserted in BUFFER instead of returned as a string.  In this case,
7421 the return value is BUFFER.
7422
7423 This function sets `last-coding-system-used' to the precise coding system
7424 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7425 not fully specified.  */)
7426   (string, coding_system, nocopy, buffer)
7427      Lisp_Object string, coding_system, nocopy, buffer;
7428 {
7429   return code_convert_string (string, coding_system, buffer,
7430                               0, ! NILP (nocopy), 0);
7431 }
7432
7433 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7434        2, 4, 0,
7435        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7436
7437 Optional third arg NOCOPY non-nil means it is OK to return STRING
7438 itself if the encoding operation is trivial.
7439
7440 Optional fourth arg BUFFER non-nil meant that the encoded text is
7441 inserted in BUFFER instead of returned as a string.  In this case,
7442 the return value is BUFFER.
7443
7444 This function sets `last-coding-system-used' to the precise coding system
7445 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7446 not fully specified.)  */)
7447      (string, coding_system, nocopy, buffer)
7448      Lisp_Object string, coding_system, nocopy, buffer;
7449 {
7450   return code_convert_string (string, coding_system, buffer,
7451                               1, ! NILP (nocopy), 1);
7452 }
7453
7454 \f
7455 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7456        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7457 Return the corresponding character.  */)
7458      (code)
7459      Lisp_Object code;
7460 {
7461   Lisp_Object spec, attrs, val;
7462   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7463   int c;
7464
7465   CHECK_NATNUM (code);
7466   c = XFASTINT (code);
7467   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7468   attrs = AREF (spec, 0);
7469
7470   if (ASCII_BYTE_P (c)
7471       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7472     return code;
7473
7474   val = CODING_ATTR_CHARSET_LIST (attrs);
7475   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7476   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7477   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
7478
7479   if (c <= 0x7F)
7480     charset = charset_roman;
7481   else if (c >= 0xA0 && c < 0xDF)
7482     {
7483       charset = charset_kana;
7484       c -= 0x80;
7485     }
7486   else
7487     {
7488       int s1 = c >> 8, s2 = c & 0xFF;
7489
7490       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7491           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7492         error ("Invalid code: %d", code);
7493       SJIS_TO_JIS (c);
7494       charset = charset_kanji;
7495     }
7496   c = DECODE_CHAR (charset, c);
7497   if (c < 0)
7498     error ("Invalid code: %d", code);
7499   return make_number (c);
7500 }
7501
7502
7503 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7504        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7505 Return the corresponding code in SJIS.  */)
7506      (ch)
7507     Lisp_Object ch;
7508 {
7509   Lisp_Object spec, attrs, charset_list;
7510   int c;
7511   struct charset *charset;
7512   unsigned code;
7513
7514   CHECK_CHARACTER (ch);
7515   c = XFASTINT (ch);
7516   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7517   attrs = AREF (spec, 0);
7518
7519   if (ASCII_CHAR_P (c)
7520       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7521     return ch;
7522
7523   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7524   charset = char_charset (c, charset_list, &code);
7525   if (code == CHARSET_INVALID_CODE (charset))
7526     error ("Can't encode by shift_jis encoding: %d", c);
7527   JIS_TO_SJIS (code);
7528
7529   return make_number (code);
7530 }
7531
7532 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7533        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7534 Return the corresponding character.  */)
7535      (code)
7536      Lisp_Object code;
7537 {
7538   Lisp_Object spec, attrs, val;
7539   struct charset *charset_roman, *charset_big5, *charset;
7540   int c;
7541
7542   CHECK_NATNUM (code);
7543   c = XFASTINT (code);
7544   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7545   attrs = AREF (spec, 0);
7546
7547   if (ASCII_BYTE_P (c)
7548       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7549     return code;
7550
7551   val = CODING_ATTR_CHARSET_LIST (attrs);
7552   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7553   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7554
7555   if (c <= 0x7F)
7556     charset = charset_roman;
7557   else
7558     {
7559       int b1 = c >> 8, b2 = c & 0x7F;
7560       if (b1 < 0xA1 || b1 > 0xFE
7561           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7562         error ("Invalid code: %d", code);
7563       charset = charset_big5;
7564     }
7565   c = DECODE_CHAR (charset, (unsigned )c);
7566   if (c < 0)
7567     error ("Invalid code: %d", code);
7568   return make_number (c);
7569 }
7570
7571 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7572        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7573 Return the corresponding character code in Big5.  */)
7574      (ch)
7575      Lisp_Object ch;
7576 {
7577   Lisp_Object spec, attrs, charset_list;
7578   struct charset *charset;
7579   int c;
7580   unsigned code;
7581
7582   CHECK_CHARACTER (ch);
7583   c = XFASTINT (ch);
7584   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7585   attrs = AREF (spec, 0);
7586   if (ASCII_CHAR_P (c)
7587       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7588     return ch;
7589
7590   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7591   charset = char_charset (c, charset_list, &code);
7592   if (code == CHARSET_INVALID_CODE (charset))
7593     error ("Can't encode by Big5 encoding: %d", c);
7594
7595   return make_number (code);
7596 }
7597
7598 \f
7599 DEFUN ("set-terminal-coding-system-internal",
7600        Fset_terminal_coding_system_internal,
7601        Sset_terminal_coding_system_internal, 1, 1, 0,
7602        doc: /* Internal use only.  */)
7603      (coding_system)
7604      Lisp_Object coding_system;
7605 {
7606   CHECK_SYMBOL (coding_system);
7607   setup_coding_system (Fcheck_coding_system (coding_system),
7608                         &terminal_coding);
7609
7610   /* We had better not send unsafe characters to terminal.  */
7611   terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7612   /* Characer composition should be disabled.  */
7613   terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7614   terminal_coding.src_multibyte = 1;
7615   terminal_coding.dst_multibyte = 0;
7616   return Qnil;
7617 }
7618
7619 DEFUN ("set-safe-terminal-coding-system-internal",
7620        Fset_safe_terminal_coding_system_internal,
7621        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7622        doc: /* Internal use only.  */)
7623      (coding_system)
7624      Lisp_Object coding_system;
7625 {
7626   CHECK_SYMBOL (coding_system);
7627   setup_coding_system (Fcheck_coding_system (coding_system),
7628                        &safe_terminal_coding);
7629   /* Characer composition should be disabled.  */
7630   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7631   safe_terminal_coding.src_multibyte = 1;
7632   safe_terminal_coding.dst_multibyte = 0;
7633   return Qnil;
7634 }
7635
7636 DEFUN ("terminal-coding-system",
7637        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
7638        doc: /* Return coding system specified for terminal output.  */)
7639      ()
7640 {
7641   return CODING_ID_NAME (terminal_coding.id);
7642 }
7643
7644 DEFUN ("set-keyboard-coding-system-internal",
7645        Fset_keyboard_coding_system_internal,
7646        Sset_keyboard_coding_system_internal, 1, 1, 0,
7647        doc: /* Internal use only.  */)
7648      (coding_system)
7649      Lisp_Object coding_system;
7650 {
7651   CHECK_SYMBOL (coding_system);
7652   setup_coding_system (Fcheck_coding_system (coding_system),
7653                        &keyboard_coding);
7654   /* Characer composition should be disabled.  */
7655   keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7656   return Qnil;
7657 }
7658
7659 DEFUN ("keyboard-coding-system",
7660        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
7661        doc: /* Return coding system specified for decoding keyboard input.  */)
7662      ()
7663 {
7664   return CODING_ID_NAME (keyboard_coding.id);
7665 }
7666
7667 \f
7668 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7669        Sfind_operation_coding_system,  1, MANY, 0,
7670        doc: /* Choose a coding system for an operation based on the target name.
7671 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7672 DECODING-SYSTEM is the coding system to use for decoding
7673 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7674 for encoding (in case OPERATION does encoding).
7675
7676 The first argument OPERATION specifies an I/O primitive:
7677   For file I/O, `insert-file-contents' or `write-region'.
7678   For process I/O, `call-process', `call-process-region', or `start-process'.
7679   For network I/O, `open-network-stream'.
7680
7681 The remaining arguments should be the same arguments that were passed
7682 to the primitive.  Depending on which primitive, one of those arguments
7683 is selected as the TARGET.  For example, if OPERATION does file I/O,
7684 whichever argument specifies the file name is TARGET.
7685
7686 TARGET has a meaning which depends on OPERATION:
7687   For file I/O, TARGET is a file name.
7688   For process I/O, TARGET is a process name.
7689   For network I/O, TARGET is a service name or a port number
7690
7691 This function looks up what specified for TARGET in,
7692 `file-coding-system-alist', `process-coding-system-alist',
7693 or `network-coding-system-alist' depending on OPERATION.
7694 They may specify a coding system, a cons of coding systems,
7695 or a function symbol to call.
7696 In the last case, we call the function with one argument,
7697 which is a list of all the arguments given to this function.
7698
7699 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7700      (nargs, args)
7701      int nargs;
7702      Lisp_Object *args;
7703 {
7704   Lisp_Object operation, target_idx, target, val;
7705   register Lisp_Object chain;
7706
7707   if (nargs < 2)
7708     error ("Too few arguments");
7709   operation = args[0];
7710   if (!SYMBOLP (operation)
7711       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7712     error ("Invalid first arguement");
7713   if (nargs < 1 + XINT (target_idx))
7714     error ("Too few arguments for operation: %s",
7715            XSYMBOL (operation)->name->data);
7716   target = args[XINT (target_idx) + 1];
7717   if (!(STRINGP (target)
7718         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7719     error ("Invalid %dth argument", XINT (target_idx) + 1);
7720
7721   chain = ((EQ (operation, Qinsert_file_contents)
7722             || EQ (operation, Qwrite_region))
7723            ? Vfile_coding_system_alist
7724            : (EQ (operation, Qopen_network_stream)
7725               ? Vnetwork_coding_system_alist
7726               : Vprocess_coding_system_alist));
7727   if (NILP (chain))
7728     return Qnil;
7729
7730   for (; CONSP (chain); chain = XCDR (chain))
7731     {
7732       Lisp_Object elt;
7733
7734       elt = XCAR (chain);
7735       if (CONSP (elt)
7736           && ((STRINGP (target)
7737                && STRINGP (XCAR (elt))
7738                && fast_string_match (XCAR (elt), target) >= 0)
7739               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7740         {
7741           val = XCDR (elt);
7742           /* Here, if VAL is both a valid coding system and a valid
7743              function symbol, we return VAL as a coding system.  */
7744           if (CONSP (val))
7745             return val;
7746           if (! SYMBOLP (val))
7747             return Qnil;
7748           if (! NILP (Fcoding_system_p (val)))
7749             return Fcons (val, val);
7750           if (! NILP (Ffboundp (val)))
7751             {
7752               val = call1 (val, Flist (nargs, args));
7753               if (CONSP (val))
7754                 return val;
7755               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7756                 return Fcons (val, val);
7757             }
7758           return Qnil;
7759         }
7760     }
7761   return Qnil;
7762 }
7763
7764 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7765        Sset_coding_system_priority, 0, MANY, 0,
7766        doc: /* Assign higher priority to the coding systems given as arguments.
7767 If multiple coding systems belongs to the same category,
7768 all but the first one are ignored.  */)
7769      (nargs, args)
7770      int nargs;
7771      Lisp_Object *args;
7772 {
7773   int i, j;
7774   int changed[coding_category_max];
7775   enum coding_category priorities[coding_category_max];
7776
7777   bzero (changed, sizeof changed);
7778
7779   for (i = j = 0; i < nargs; i++)
7780     {
7781       enum coding_category category;
7782       Lisp_Object spec, attrs;
7783
7784       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7785       attrs = AREF (spec, 0);
7786       category = XINT (CODING_ATTR_CATEGORY (attrs));
7787       if (changed[category])
7788         /* Ignore this coding system because a coding system of the
7789            same category already had a higher priority.  */
7790         continue;
7791       changed[category] = 1;
7792       priorities[j++] = category;
7793       if (coding_categories[category].id >= 0
7794           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7795         setup_coding_system (args[i], &coding_categories[category]);
7796       Fset (AREF (Vcoding_category_table, category), args[i]);
7797     }
7798
7799   /* Now we have decided top J priorities.  Reflect the order of the
7800      original priorities to the remaining priorities.  */
7801
7802   for (i = j, j = 0; i < coding_category_max; i++, j++)
7803     {
7804       while (j < coding_category_max
7805              && changed[coding_priorities[j]])
7806         j++;
7807       if (j == coding_category_max)
7808         abort ();
7809       priorities[i] = coding_priorities[j];
7810     }
7811
7812   bcopy (priorities, coding_priorities, sizeof priorities);
7813
7814   /* Update `coding-category-list'.  */
7815   Vcoding_category_list = Qnil;
7816   for (i = coding_category_max - 1; i >= 0; i--)
7817     Vcoding_category_list
7818       = Fcons (AREF (Vcoding_category_table, priorities[i]),
7819                Vcoding_category_list);
7820
7821   return Qnil;
7822 }
7823
7824 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7825        Scoding_system_priority_list, 0, 1, 0,
7826        doc: /* Return a list of coding systems ordered by their priorities.
7827 HIGHESTP non-nil means just return the highest priority one.  */)
7828      (highestp)
7829      Lisp_Object highestp;
7830 {
7831   int i;
7832   Lisp_Object val;
7833
7834   for (i = 0, val = Qnil; i < coding_category_max; i++)
7835     {
7836       enum coding_category category = coding_priorities[i];
7837       int id = coding_categories[category].id;
7838       Lisp_Object attrs;
7839
7840       if (id < 0)
7841         continue;
7842       attrs = CODING_ID_ATTRS (id);
7843       if (! NILP (highestp))
7844         return CODING_ATTR_BASE_NAME (attrs);
7845       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7846     }
7847   return Fnreverse (val);
7848 }
7849
7850 static char *suffixes[] = { "-unix", "-dos", "-mac" };
7851
7852 static Lisp_Object
7853 make_subsidiaries (base)
7854      Lisp_Object base;
7855 {
7856   Lisp_Object subsidiaries;
7857   int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7858   char *buf = (char *) alloca (base_name_len + 6);
7859   int i;
7860
7861   bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7862   subsidiaries = Fmake_vector (make_number (3), Qnil);
7863   for (i = 0; i < 3; i++)
7864     {
7865       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7866       ASET (subsidiaries, i, intern (buf));
7867     }
7868   return subsidiaries;
7869 }
7870
7871
7872 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7873        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
7874        doc: /* For internal use only.
7875 usage: (define-coding-system-internal ...)  */)
7876      (nargs, args)
7877      int nargs;
7878      Lisp_Object *args;
7879 {
7880   Lisp_Object name;
7881   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
7882   Lisp_Object attrs;            /* Vector of attributes.  */
7883   Lisp_Object eol_type;
7884   Lisp_Object aliases;
7885   Lisp_Object coding_type, charset_list, safe_charsets;
7886   enum coding_category category;
7887   Lisp_Object tail, val;
7888   int max_charset_id = 0;
7889   int i;
7890
7891   if (nargs < coding_arg_max)
7892     goto short_args;
7893
7894   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7895
7896   name = args[coding_arg_name];
7897   CHECK_SYMBOL (name);
7898   CODING_ATTR_BASE_NAME (attrs) = name;
7899
7900   val = args[coding_arg_mnemonic];
7901   if (! STRINGP (val))
7902     CHECK_CHARACTER (val);
7903   CODING_ATTR_MNEMONIC (attrs) = val;
7904
7905   coding_type = args[coding_arg_coding_type];
7906   CHECK_SYMBOL (coding_type);
7907   CODING_ATTR_TYPE (attrs) = coding_type;
7908
7909   charset_list = args[coding_arg_charset_list];
7910   if (SYMBOLP (charset_list))
7911     {
7912       if (EQ (charset_list, Qiso_2022))
7913         {
7914           if (! EQ (coding_type, Qiso_2022))
7915             error ("Invalid charset-list");
7916           charset_list = Viso_2022_charset_list;
7917         }
7918       else if (EQ (charset_list, Qemacs_mule))
7919         {
7920           if (! EQ (coding_type, Qemacs_mule))
7921             error ("Invalid charset-list");
7922           charset_list = Vemacs_mule_charset_list;
7923         }
7924       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7925         if (max_charset_id < XFASTINT (XCAR (tail)))
7926           max_charset_id = XFASTINT (XCAR (tail));
7927     }
7928   else
7929     {
7930       charset_list = Fcopy_sequence (charset_list);
7931       for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7932         {
7933           struct charset *charset;
7934
7935           val = Fcar (tail);
7936           CHECK_CHARSET_GET_CHARSET (val, charset);
7937           if (EQ (coding_type, Qiso_2022)
7938               ? CHARSET_ISO_FINAL (charset) < 0
7939               : EQ (coding_type, Qemacs_mule)
7940               ? CHARSET_EMACS_MULE_ID (charset) < 0
7941               : 0)
7942             error ("Can't handle charset `%s'",
7943                    XSYMBOL (CHARSET_NAME (charset))->name->data);
7944
7945           XCAR (tail) = make_number (charset->id);
7946           if (max_charset_id < charset->id)
7947             max_charset_id = charset->id;
7948         }
7949     }
7950   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7951
7952   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7953                                 make_number (255));
7954   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7955     XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7956   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7957
7958   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
7959
7960   val = args[coding_arg_decode_translation_table];
7961   if (! NILP (val))
7962     CHECK_CHAR_TABLE (val);
7963   CODING_ATTR_DECODE_TBL (attrs) = val;
7964
7965   val = args[coding_arg_encode_translation_table];
7966   if (! NILP (val))
7967     CHECK_CHAR_TABLE (val);
7968   CODING_ATTR_ENCODE_TBL (attrs) = val;
7969
7970   val = args[coding_arg_post_read_conversion];
7971   CHECK_SYMBOL (val);
7972   CODING_ATTR_POST_READ (attrs) = val;
7973
7974   val = args[coding_arg_pre_write_conversion];
7975   CHECK_SYMBOL (val);
7976   CODING_ATTR_PRE_WRITE (attrs) = val;
7977
7978   val = args[coding_arg_default_char];
7979   if (NILP (val))
7980     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7981   else
7982     {
7983       CHECK_CHARACTER (val);
7984       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7985     }
7986
7987   val = args[coding_arg_plist];
7988   CHECK_LIST (val);
7989   CODING_ATTR_PLIST (attrs) = val;
7990
7991   if (EQ (coding_type, Qcharset))
7992     {
7993       Lisp_Object list;
7994       /* Generate a lisp vector of 256 elements.  Each element is nil,
7995          integer, or a list of charset IDs.
7996
7997          If Nth element is nil, the byte code N is invalid in this
7998          coding system.
7999
8000          If Nth element is a number NUM, N is the first byte of a
8001          charset whose ID is NUM.
8002
8003          If Nth element is a list of charset IDs, N is the first byte
8004          of one of them.  The list is sorted by dimensions of the
8005          charsets.  A charset of smaller dimension comes firtst.
8006       */
8007       for (list = Qnil, tail = charset_list; CONSP (tail); tail = XCDR (tail))
8008         {
8009           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8010
8011           if (charset->method == CHARSET_METHOD_SUPERSET)
8012             {
8013               val = CHARSET_SUPERSET (charset);
8014               for (; CONSP (val); val = XCDR (val))
8015                 list = Fcons (XCAR (XCAR (val)), list);
8016             }
8017           else
8018             list = Fcons (XCAR (tail), list);
8019         }
8020
8021       val = Fmake_vector (make_number (256), Qnil);
8022
8023       for (tail = Fnreverse (list); CONSP (tail); tail = XCDR (tail))
8024         {
8025           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8026           int dim = CHARSET_DIMENSION (charset);
8027           int idx = (dim - 1) * 4;
8028
8029           if (CHARSET_ASCII_COMPATIBLE_P (charset))
8030             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8031
8032           for (i = charset->code_space[idx];
8033                i <= charset->code_space[idx + 1]; i++)
8034             {
8035               Lisp_Object tmp, tmp2;
8036               int dim2;
8037
8038               tmp = AREF (val, i);
8039               if (NILP (tmp))
8040                 tmp = XCAR (tail);
8041               else if (NUMBERP (tmp))
8042                 {
8043                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8044                   if (dim < dim2)
8045                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
8046                   else
8047                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
8048                 }
8049               else
8050                 {
8051                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8052                     {
8053                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8054                       if (dim < dim2)
8055                         break;
8056                     }
8057                   if (NILP (tmp2))
8058                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8059                   else
8060                     {
8061                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8062                       XSETCAR (tmp2, XCAR (tail));
8063                     }
8064                 }
8065               ASET (val, i, tmp);
8066             }
8067         }
8068       ASET (attrs, coding_attr_charset_valids, val);
8069       category = coding_category_charset;
8070     }
8071   else if (EQ (coding_type, Qccl))
8072     {
8073       Lisp_Object valids;
8074
8075       if (nargs < coding_arg_ccl_max)
8076         goto short_args;
8077
8078       val = args[coding_arg_ccl_decoder];
8079       CHECK_CCL_PROGRAM (val);
8080       if (VECTORP (val))
8081         val = Fcopy_sequence (val);
8082       ASET (attrs, coding_attr_ccl_decoder, val);
8083
8084       val = args[coding_arg_ccl_encoder];
8085       CHECK_CCL_PROGRAM (val);
8086       if (VECTORP (val))
8087         val = Fcopy_sequence (val);
8088       ASET (attrs, coding_attr_ccl_encoder, val);
8089
8090       val = args[coding_arg_ccl_valids];
8091       valids = Fmake_string (make_number (256), make_number (0));
8092       for (tail = val; !NILP (tail); tail = Fcdr (tail))
8093         {
8094           int from, to;
8095
8096           val = Fcar (tail);
8097           if (INTEGERP (val))
8098             {
8099               from = to = XINT (val);
8100               if (from < 0 || from > 255)
8101                 args_out_of_range_3 (val, make_number (0), make_number (255));
8102             }
8103           else
8104             {
8105               CHECK_CONS (val);
8106               CHECK_NUMBER (XCAR (val));
8107               CHECK_NUMBER (XCDR (val));
8108               from = XINT (XCAR (val));
8109               if (from < 0 || from > 255)
8110                 args_out_of_range_3 (XCAR (val),
8111                                      make_number (0), make_number (255));
8112               to = XINT (XCDR (val));
8113               if (to < from || to > 255)
8114                 args_out_of_range_3 (XCDR (val),
8115                                      XCAR (val), make_number (255));
8116             }
8117           for (i = from; i <= to; i++)
8118             XSTRING (valids)->data[i] = 1;
8119         }
8120       ASET (attrs, coding_attr_ccl_valids, valids);
8121
8122       category = coding_category_ccl;
8123     }
8124   else if (EQ (coding_type, Qutf_16))
8125     {
8126       Lisp_Object bom, endian;
8127
8128       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8129
8130       if (nargs < coding_arg_utf16_max)
8131         goto short_args;
8132
8133       bom = args[coding_arg_utf16_bom];
8134       if (! NILP (bom) && ! EQ (bom, Qt))
8135         {
8136           CHECK_CONS (bom);
8137           CHECK_CODING_SYSTEM (XCAR (bom));
8138           CHECK_CODING_SYSTEM (XCDR (bom));
8139         }
8140       ASET (attrs, coding_attr_utf_16_bom, bom);
8141
8142       endian = args[coding_arg_utf16_endian];
8143       CHECK_SYMBOL (endian);
8144       if (NILP (endian))
8145         endian = Qbig;
8146       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8147         error ("Invalid endian: %s", XSYMBOL (endian)->name->data);
8148       ASET (attrs, coding_attr_utf_16_endian, endian);
8149
8150       category = (CONSP (bom)
8151                   ? coding_category_utf_16_auto
8152                   : NILP (bom)
8153                   ? (EQ (endian, Qbig)
8154                      ? coding_category_utf_16_be_nosig
8155                      : coding_category_utf_16_le_nosig)
8156                   : (EQ (endian, Qbig)
8157                      ? coding_category_utf_16_be
8158                      : coding_category_utf_16_le));
8159     }
8160   else if (EQ (coding_type, Qiso_2022))
8161     {
8162       Lisp_Object initial, reg_usage, request, flags;
8163       int i;
8164
8165       if (nargs < coding_arg_iso2022_max)
8166         goto short_args;
8167
8168       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8169       CHECK_VECTOR (initial);
8170       for (i = 0; i < 4; i++)
8171         {
8172           val = Faref (initial, make_number (i));
8173           if (! NILP (val))
8174             {
8175               struct charset *charset;
8176
8177               CHECK_CHARSET_GET_CHARSET (val, charset);
8178               ASET (initial, i, make_number (CHARSET_ID (charset)));
8179               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8180                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8181             }
8182           else
8183             ASET (initial, i, make_number (-1));
8184         }
8185
8186       reg_usage = args[coding_arg_iso2022_reg_usage];
8187       CHECK_CONS (reg_usage);
8188       CHECK_NATNUM (XCAR (reg_usage));
8189       CHECK_NATNUM (XCDR (reg_usage));
8190
8191       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8192       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
8193         {
8194           int id;
8195
8196           val = Fcar (tail);
8197           CHECK_CONS (val);
8198           CHECK_CHARSET_GET_ID (XCAR (val), id);
8199           CHECK_NATNUM (XCDR (val));
8200           if (XINT (XCDR (val)) >= 4)
8201             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8202           XCAR (val) = make_number (id);
8203         }
8204
8205       flags = args[coding_arg_iso2022_flags];
8206       CHECK_NATNUM (flags);
8207       i = XINT (flags);
8208       if (EQ (args[coding_arg_charset_list], Qiso_2022))
8209         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8210
8211       ASET (attrs, coding_attr_iso_initial, initial);
8212       ASET (attrs, coding_attr_iso_usage, reg_usage);
8213       ASET (attrs, coding_attr_iso_request, request);
8214       ASET (attrs, coding_attr_iso_flags, flags);
8215       setup_iso_safe_charsets (attrs);
8216
8217       if (i & CODING_ISO_FLAG_SEVEN_BITS)
8218         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8219                           | CODING_ISO_FLAG_SINGLE_SHIFT))
8220                     ? coding_category_iso_7_else
8221                     : EQ (args[coding_arg_charset_list], Qiso_2022)
8222                     ? coding_category_iso_7
8223                     : coding_category_iso_7_tight);
8224       else
8225         {
8226           int id = XINT (AREF (initial, 1));
8227
8228           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
8229                        || EQ (args[coding_arg_charset_list], Qiso_2022)
8230                        || id < 0)
8231                       ? coding_category_iso_8_else
8232                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8233                       ? coding_category_iso_8_1
8234                       : coding_category_iso_8_2);
8235         }
8236       if (category != coding_category_iso_8_1
8237           && category != coding_category_iso_8_2)
8238         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8239     }
8240   else if (EQ (coding_type, Qemacs_mule))
8241     {
8242       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8243         ASET (attrs, coding_attr_emacs_mule_full, Qt);
8244       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8245       category = coding_category_emacs_mule;
8246     }
8247   else if (EQ (coding_type, Qshift_jis))
8248     {
8249
8250       struct charset *charset;
8251
8252       if (XINT (Flength (charset_list)) != 3)
8253         error ("There should be just three charsets");
8254
8255       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8256       if (CHARSET_DIMENSION (charset) != 1)
8257         error ("Dimension of charset %s is not one",
8258                XSYMBOL (CHARSET_NAME (charset))->name->data);
8259       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8260         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8261
8262       charset_list = XCDR (charset_list);
8263       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8264       if (CHARSET_DIMENSION (charset) != 1)
8265         error ("Dimension of charset %s is not one",
8266                XSYMBOL (CHARSET_NAME (charset))->name->data);
8267
8268       charset_list = XCDR (charset_list);
8269       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8270       if (CHARSET_DIMENSION (charset) != 2)
8271         error ("Dimension of charset %s is not two",
8272                XSYMBOL (CHARSET_NAME (charset))->name->data);
8273
8274       category = coding_category_sjis;
8275       Vsjis_coding_system = name;
8276     }
8277   else if (EQ (coding_type, Qbig5))
8278     {
8279       struct charset *charset;
8280
8281       if (XINT (Flength (charset_list)) != 2)
8282         error ("There should be just two charsets");
8283
8284       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8285       if (CHARSET_DIMENSION (charset) != 1)
8286         error ("Dimension of charset %s is not one",
8287                XSYMBOL (CHARSET_NAME (charset))->name->data);
8288       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8289         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8290
8291       charset_list = XCDR (charset_list);
8292       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8293       if (CHARSET_DIMENSION (charset) != 2)
8294         error ("Dimension of charset %s is not two",
8295                XSYMBOL (CHARSET_NAME (charset))->name->data);
8296
8297       category = coding_category_big5;
8298       Vbig5_coding_system = name;
8299     }
8300   else if (EQ (coding_type, Qraw_text))
8301     {
8302       category = coding_category_raw_text;
8303       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8304     }
8305   else if (EQ (coding_type, Qutf_8))
8306     {
8307       category = coding_category_utf_8;
8308       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8309     }
8310   else if (EQ (coding_type, Qundecided))
8311     category = coding_category_undecided;
8312   else
8313     error ("Invalid coding system type: %s",
8314            XSYMBOL (coding_type)->name->data);
8315
8316   CODING_ATTR_CATEGORY (attrs) = make_number (category);
8317   CODING_ATTR_PLIST (attrs)
8318     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
8319                                 CODING_ATTR_PLIST (attrs)));
8320
8321   eol_type = args[coding_arg_eol_type];
8322   if (! NILP (eol_type)
8323       && ! EQ (eol_type, Qunix)
8324       && ! EQ (eol_type, Qdos)
8325       && ! EQ (eol_type, Qmac))
8326     error ("Invalid eol-type");
8327
8328   aliases = Fcons (name, Qnil);
8329
8330   if (NILP (eol_type))
8331     {
8332       eol_type = make_subsidiaries (name);
8333       for (i = 0; i < 3; i++)
8334         {
8335           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8336
8337           this_name = AREF (eol_type, i);
8338           this_aliases = Fcons (this_name, Qnil);
8339           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8340           this_spec = Fmake_vector (make_number (3), attrs);
8341           ASET (this_spec, 1, this_aliases);
8342           ASET (this_spec, 2, this_eol_type);
8343           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8344           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8345           Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8346                                         Vcoding_system_alist);
8347         }
8348     }
8349
8350   spec_vec = Fmake_vector (make_number (3), attrs);
8351   ASET (spec_vec, 1, aliases);
8352   ASET (spec_vec, 2, eol_type);
8353
8354   Fputhash (name, spec_vec, Vcoding_system_hash_table);
8355   Vcoding_system_list = Fcons (name, Vcoding_system_list);
8356   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8357                                 Vcoding_system_alist);
8358
8359   {
8360     int id = coding_categories[category].id;
8361
8362     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8363       setup_coding_system (name, &coding_categories[category]);
8364   }
8365
8366   return Qnil;
8367
8368  short_args:
8369   return Fsignal (Qwrong_number_of_arguments,
8370                   Fcons (intern ("define-coding-system-internal"),
8371                          make_number (nargs)));
8372 }
8373
8374 /* Fixme: should this record the alias relationships for
8375    diagnostics?  Should it update coding-system-list?  */
8376 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8377        Sdefine_coding_system_alias, 2, 2, 0,
8378        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
8379      (alias, coding_system)
8380      Lisp_Object alias, coding_system;
8381 {
8382   Lisp_Object spec, aliases, eol_type;
8383
8384   CHECK_SYMBOL (alias);
8385   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8386   aliases = AREF (spec, 1);
8387   while (!NILP (XCDR (aliases)))
8388     aliases = XCDR (aliases);
8389   XCDR (aliases) = Fcons (alias, Qnil);
8390
8391   eol_type = AREF (spec, 2);
8392   if (VECTORP (eol_type))
8393     {
8394       Lisp_Object subsidiaries;
8395       int i;
8396
8397       subsidiaries = make_subsidiaries (alias);
8398       for (i = 0; i < 3; i++)
8399         Fdefine_coding_system_alias (AREF (subsidiaries, i),
8400                                      AREF (eol_type, i));
8401
8402       ASET (spec, 2, subsidiaries);
8403     }
8404
8405   Fputhash (alias, spec, Vcoding_system_hash_table);
8406   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8407                                 Vcoding_system_alist);
8408
8409   return Qnil;
8410 }
8411
8412 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8413        1, 1, 0,
8414        doc: /* Return the base of CODING-SYSTEM.
8415 Any alias or subsidiary coding system is not a base coding system.  */)
8416   (coding_system)
8417      Lisp_Object coding_system;
8418 {
8419   Lisp_Object spec, attrs;
8420
8421   if (NILP (coding_system))
8422     return (Qno_conversion);
8423   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8424   attrs = AREF (spec, 0);
8425   return CODING_ATTR_BASE_NAME (attrs);
8426 }
8427
8428 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8429        1, 1, 0,
8430        doc: "Return the property list of CODING-SYSTEM.")
8431      (coding_system)
8432      Lisp_Object coding_system;
8433 {
8434   Lisp_Object spec, attrs;
8435
8436   if (NILP (coding_system))
8437     coding_system = Qno_conversion;
8438   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8439   attrs = AREF (spec, 0);
8440   return CODING_ATTR_PLIST (attrs);
8441 }
8442
8443
8444 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8445        1, 1, 0,
8446        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
8447      (coding_system)
8448      Lisp_Object coding_system;
8449 {
8450   Lisp_Object spec;
8451
8452   if (NILP (coding_system))
8453     coding_system = Qno_conversion;
8454   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8455   return AREF (spec, 1);
8456 }
8457
8458 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8459        Scoding_system_eol_type, 1, 1, 0,
8460        doc: /* Return eol-type of CODING-SYSTEM.
8461 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8462
8463 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8464 and CR respectively.
8465
8466 A vector value indicates that a format of end-of-line should be
8467 detected automatically.  Nth element of the vector is the subsidiary
8468 coding system whose eol-type is N.  */)
8469      (coding_system)
8470      Lisp_Object coding_system;
8471 {
8472   Lisp_Object spec, eol_type;
8473   int n;
8474
8475   if (NILP (coding_system))
8476     coding_system = Qno_conversion;
8477   if (! CODING_SYSTEM_P (coding_system))
8478     return Qnil;
8479   spec = CODING_SYSTEM_SPEC (coding_system);
8480   eol_type = AREF (spec, 2);
8481   if (VECTORP (eol_type))
8482     return Fcopy_sequence (eol_type);
8483   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8484   return make_number (n);
8485 }
8486
8487 #endif /* emacs */
8488
8489 \f
8490 /*** 9. Post-amble ***/
8491
8492 void
8493 init_coding_once ()
8494 {
8495   int i;
8496
8497   for (i = 0; i < coding_category_max; i++)
8498     {
8499       coding_categories[i].id = -1;
8500       coding_priorities[i] = i;
8501     }
8502
8503   /* ISO2022 specific initialize routine.  */
8504   for (i = 0; i < 0x20; i++)
8505     iso_code_class[i] = ISO_control_0;
8506   for (i = 0x21; i < 0x7F; i++)
8507     iso_code_class[i] = ISO_graphic_plane_0;
8508   for (i = 0x80; i < 0xA0; i++)
8509     iso_code_class[i] = ISO_control_1;
8510   for (i = 0xA1; i < 0xFF; i++)
8511     iso_code_class[i] = ISO_graphic_plane_1;
8512   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8513   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8514   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
8515   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8516   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8517   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8518   iso_code_class[ISO_CODE_ESC] = ISO_escape;
8519   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8520   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
8521   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
8522
8523   inhibit_pre_post_conversion = 0;
8524
8525   for (i = 0; i < 256; i++)
8526     {
8527       emacs_mule_bytes[i] = 1;
8528     }
8529   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
8530   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
8531   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
8532   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
8533 }
8534
8535 #ifdef emacs
8536
8537 void
8538 syms_of_coding ()
8539 {
8540   staticpro (&Vcoding_system_hash_table);
8541   Vcoding_system_hash_table = Fmakehash (Qeq);
8542
8543   staticpro (&Vsjis_coding_system);
8544   Vsjis_coding_system = Qnil;
8545
8546   staticpro (&Vbig5_coding_system);
8547   Vbig5_coding_system = Qnil;
8548
8549   staticpro (&Vcode_conversion_work_buf_list);
8550   Vcode_conversion_work_buf_list = Qnil;
8551
8552   staticpro (&Vcode_conversion_reused_work_buf);
8553   Vcode_conversion_reused_work_buf = Qnil;
8554
8555   DEFSYM (Qcharset, "charset");
8556   DEFSYM (Qtarget_idx, "target-idx");
8557   DEFSYM (Qcoding_system_history, "coding-system-history");
8558   Fset (Qcoding_system_history, Qnil);
8559
8560   /* Target FILENAME is the first argument.  */
8561   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
8562   /* Target FILENAME is the third argument.  */
8563   Fput (Qwrite_region, Qtarget_idx, make_number (2));
8564
8565   DEFSYM (Qcall_process, "call-process");
8566   /* Target PROGRAM is the first argument.  */
8567   Fput (Qcall_process, Qtarget_idx, make_number (0));
8568
8569   DEFSYM (Qcall_process_region, "call-process-region");
8570   /* Target PROGRAM is the third argument.  */
8571   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
8572
8573   DEFSYM (Qstart_process, "start-process");
8574   /* Target PROGRAM is the third argument.  */
8575   Fput (Qstart_process, Qtarget_idx, make_number (2));
8576
8577   DEFSYM (Qopen_network_stream, "open-network-stream");
8578   /* Target SERVICE is the fourth argument.  */
8579   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8580
8581   DEFSYM (Qcoding_system, "coding-system");
8582   DEFSYM (Qcoding_aliases, "coding-aliases");
8583
8584   DEFSYM (Qeol_type, "eol-type");
8585   DEFSYM (Qunix, "unix");
8586   DEFSYM (Qdos, "dos");
8587
8588   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8589   DEFSYM (Qpost_read_conversion, "post-read-conversion");
8590   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8591   DEFSYM (Qdefault_char, "default-char");
8592   DEFSYM (Qundecided, "undecided");
8593   DEFSYM (Qno_conversion, "no-conversion");
8594   DEFSYM (Qraw_text, "raw-text");
8595
8596   DEFSYM (Qiso_2022, "iso-2022");
8597
8598   DEFSYM (Qutf_8, "utf-8");
8599
8600   DEFSYM (Qutf_16, "utf-16");
8601   DEFSYM (Qbig, "big");
8602   DEFSYM (Qlittle, "little");
8603
8604   DEFSYM (Qshift_jis, "shift-jis");
8605   DEFSYM (Qbig5, "big5");
8606
8607   DEFSYM (Qcoding_system_p, "coding-system-p");
8608
8609   DEFSYM (Qcoding_system_error, "coding-system-error");
8610   Fput (Qcoding_system_error, Qerror_conditions,
8611         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
8612   Fput (Qcoding_system_error, Qerror_message,
8613         build_string ("Invalid coding system"));
8614
8615   /* Intern this now in case it isn't already done.
8616      Setting this variable twice is harmless.
8617      But don't staticpro it here--that is done in alloc.c.  */
8618   Qchar_table_extra_slots = intern ("char-table-extra-slots");
8619
8620   DEFSYM (Qtranslation_table, "translation-table");
8621   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
8622   DEFSYM (Qtranslation_table_id, "translation-table-id");
8623   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8624   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
8625
8626   DEFSYM (Qvalid_codes, "valid-codes");
8627
8628   DEFSYM (Qemacs_mule, "emacs-mule");
8629
8630   DEFSYM (QCcategory, ":category");
8631
8632   Vcoding_category_table
8633     = Fmake_vector (make_number (coding_category_max), Qnil);
8634   staticpro (&Vcoding_category_table);
8635   /* Followings are target of code detection.  */
8636   ASET (Vcoding_category_table, coding_category_iso_7,
8637         intern ("coding-category-iso-7"));
8638   ASET (Vcoding_category_table, coding_category_iso_7_tight,
8639         intern ("coding-category-iso-7-tight"));
8640   ASET (Vcoding_category_table, coding_category_iso_8_1,
8641         intern ("coding-category-iso-8-1"));
8642   ASET (Vcoding_category_table, coding_category_iso_8_2,
8643         intern ("coding-category-iso-8-2"));
8644   ASET (Vcoding_category_table, coding_category_iso_7_else,
8645         intern ("coding-category-iso-7-else"));
8646   ASET (Vcoding_category_table, coding_category_iso_8_else,
8647         intern ("coding-category-iso-8-else"));
8648   ASET (Vcoding_category_table, coding_category_utf_8,
8649         intern ("coding-category-utf-8"));
8650   ASET (Vcoding_category_table, coding_category_utf_16_be,
8651         intern ("coding-category-utf-16-be"));
8652   ASET (Vcoding_category_table, coding_category_utf_16_auto,
8653         intern ("coding-category-utf-16-auto"));
8654   ASET (Vcoding_category_table, coding_category_utf_16_le,
8655         intern ("coding-category-utf-16-le"));
8656   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8657         intern ("coding-category-utf-16-be-nosig"));
8658   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8659         intern ("coding-category-utf-16-le-nosig"));
8660   ASET (Vcoding_category_table, coding_category_charset,
8661         intern ("coding-category-charset"));
8662   ASET (Vcoding_category_table, coding_category_sjis,
8663         intern ("coding-category-sjis"));
8664   ASET (Vcoding_category_table, coding_category_big5,
8665         intern ("coding-category-big5"));
8666   ASET (Vcoding_category_table, coding_category_ccl,
8667         intern ("coding-category-ccl"));
8668   ASET (Vcoding_category_table, coding_category_emacs_mule,
8669         intern ("coding-category-emacs-mule"));
8670   /* Followings are NOT target of code detection.  */
8671   ASET (Vcoding_category_table, coding_category_raw_text,
8672         intern ("coding-category-raw-text"));
8673   ASET (Vcoding_category_table, coding_category_undecided,
8674         intern ("coding-category-undecided"));
8675
8676   defsubr (&Scoding_system_p);
8677   defsubr (&Sread_coding_system);
8678   defsubr (&Sread_non_nil_coding_system);
8679   defsubr (&Scheck_coding_system);
8680   defsubr (&Sdetect_coding_region);
8681   defsubr (&Sdetect_coding_string);
8682   defsubr (&Sfind_coding_systems_region_internal);
8683   defsubr (&Scheck_coding_systems_region);
8684   defsubr (&Sdecode_coding_region);
8685   defsubr (&Sencode_coding_region);
8686   defsubr (&Sdecode_coding_string);
8687   defsubr (&Sencode_coding_string);
8688   defsubr (&Sdecode_sjis_char);
8689   defsubr (&Sencode_sjis_char);
8690   defsubr (&Sdecode_big5_char);
8691   defsubr (&Sencode_big5_char);
8692   defsubr (&Sset_terminal_coding_system_internal);
8693   defsubr (&Sset_safe_terminal_coding_system_internal);
8694   defsubr (&Sterminal_coding_system);
8695   defsubr (&Sset_keyboard_coding_system_internal);
8696   defsubr (&Skeyboard_coding_system);
8697   defsubr (&Sfind_operation_coding_system);
8698   defsubr (&Sset_coding_system_priority);
8699   defsubr (&Sdefine_coding_system_internal);
8700   defsubr (&Sdefine_coding_system_alias);
8701   defsubr (&Scoding_system_base);
8702   defsubr (&Scoding_system_plist);
8703   defsubr (&Scoding_system_aliases);
8704   defsubr (&Scoding_system_eol_type);
8705   defsubr (&Scoding_system_priority_list);
8706
8707   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
8708                doc: /* List of coding systems.
8709
8710 Do not alter the value of this variable manually.  This variable should be
8711 updated by the functions `define-coding-system' and
8712 `define-coding-system-alias'.  */);
8713   Vcoding_system_list = Qnil;
8714
8715   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
8716                doc: /* Alist of coding system names.
8717 Each element is one element list of coding system name.
8718 This variable is given to `completing-read' as TABLE argument.
8719
8720 Do not alter the value of this variable manually.  This variable should be
8721 updated by the functions `make-coding-system' and
8722 `define-coding-system-alias'.  */);
8723   Vcoding_system_alist = Qnil;
8724
8725   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
8726                doc: /* List of coding-categories (symbols) ordered by priority.
8727
8728 On detecting a coding system, Emacs tries code detection algorithms
8729 associated with each coding-category one by one in this order.  When
8730 one algorithm agrees with a byte sequence of source text, the coding
8731 system bound to the corresponding coding-category is selected.  */);
8732   {
8733     int i;
8734
8735     Vcoding_category_list = Qnil;
8736     for (i = coding_category_max - 1; i >= 0; i--)
8737       Vcoding_category_list
8738         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8739                  Vcoding_category_list);
8740   }
8741
8742   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
8743                doc: /* Specify the coding system for read operations.
8744 It is useful to bind this variable with `let', but do not set it globally.
8745 If the value is a coding system, it is used for decoding on read operation.
8746 If not, an appropriate element is used from one of the coding system alists:
8747 There are three such tables, `file-coding-system-alist',
8748 `process-coding-system-alist', and `network-coding-system-alist'.  */);
8749   Vcoding_system_for_read = Qnil;
8750
8751   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
8752                doc: /* Specify the coding system for write operations.
8753 Programs bind this variable with `let', but you should not set it globally.
8754 If the value is a coding system, it is used for encoding of output,
8755 when writing it to a file and when sending it to a file or subprocess.
8756
8757 If this does not specify a coding system, an appropriate element
8758 is used from one of the coding system alists:
8759 There are three such tables, `file-coding-system-alist',
8760 `process-coding-system-alist', and `network-coding-system-alist'.
8761 For output to files, if the above procedure does not specify a coding system,
8762 the value of `buffer-file-coding-system' is used.  */);
8763   Vcoding_system_for_write = Qnil;
8764
8765   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
8766                doc: /*
8767 Coding system used in the latest file or process I/O.  */);
8768   Vlast_coding_system_used = Qnil;
8769
8770   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
8771                doc: /*
8772 *Non-nil means always inhibit code conversion of end-of-line format.
8773 See info node `Coding Systems' and info node `Text and Binary' concerning
8774 such conversion.  */);
8775   inhibit_eol_conversion = 0;
8776
8777   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
8778                doc: /*
8779 Non-nil means process buffer inherits coding system of process output.
8780 Bind it to t if the process output is to be treated as if it were a file
8781 read from some filesystem.  */);
8782   inherit_process_coding_system = 0;
8783
8784   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
8785                doc: /*
8786 Alist to decide a coding system to use for a file I/O operation.
8787 The format is ((PATTERN . VAL) ...),
8788 where PATTERN is a regular expression matching a file name,
8789 VAL is a coding system, a cons of coding systems, or a function symbol.
8790 If VAL is a coding system, it is used for both decoding and encoding
8791 the file contents.
8792 If VAL is a cons of coding systems, the car part is used for decoding,
8793 and the cdr part is used for encoding.
8794 If VAL is a function symbol, the function must return a coding system
8795 or a cons of coding systems which are used as above.  The function gets
8796 the arguments with which `find-operation-coding-systems' was called.
8797
8798 See also the function `find-operation-coding-system'
8799 and the variable `auto-coding-alist'.  */);
8800   Vfile_coding_system_alist = Qnil;
8801
8802   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
8803                doc: /*
8804 Alist to decide a coding system to use for a process I/O operation.
8805 The format is ((PATTERN . VAL) ...),
8806 where PATTERN is a regular expression matching a program name,
8807 VAL is a coding system, a cons of coding systems, or a function symbol.
8808 If VAL is a coding system, it is used for both decoding what received
8809 from the program and encoding what sent to the program.
8810 If VAL is a cons of coding systems, the car part is used for decoding,
8811 and the cdr part is used for encoding.
8812 If VAL is a function symbol, the function must return a coding system
8813 or a cons of coding systems which are used as above.
8814
8815 See also the function `find-operation-coding-system'.  */);
8816   Vprocess_coding_system_alist = Qnil;
8817
8818   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
8819                doc: /*
8820 Alist to decide a coding system to use for a network I/O operation.
8821 The format is ((PATTERN . VAL) ...),
8822 where PATTERN is a regular expression matching a network service name
8823 or is a port number to connect to,
8824 VAL is a coding system, a cons of coding systems, or a function symbol.
8825 If VAL is a coding system, it is used for both decoding what received
8826 from the network stream and encoding what sent to the network stream.
8827 If VAL is a cons of coding systems, the car part is used for decoding,
8828 and the cdr part is used for encoding.
8829 If VAL is a function symbol, the function must return a coding system
8830 or a cons of coding systems which are used as above.
8831
8832 See also the function `find-operation-coding-system'.  */);
8833   Vnetwork_coding_system_alist = Qnil;
8834
8835   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
8836                doc: /* Coding system to use with system messages.
8837 Also used for decoding keyboard input on X Window system.  */);
8838   Vlocale_coding_system = Qnil;
8839
8840   /* The eol mnemonics are reset in startup.el system-dependently.  */
8841   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
8842                doc: /*
8843 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
8844   eol_mnemonic_unix = build_string (":");
8845
8846   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
8847                doc: /*
8848 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
8849   eol_mnemonic_dos = build_string ("\\");
8850
8851   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
8852                doc: /*
8853 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
8854   eol_mnemonic_mac = build_string ("/");
8855
8856   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
8857                doc: /*
8858 *String displayed in mode line when end-of-line format is not yet determined.  */);
8859   eol_mnemonic_undecided = build_string (":");
8860
8861   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
8862                doc: /*
8863 *Non-nil enables character translation while encoding and decoding.  */);
8864   Venable_character_translation = Qt;
8865
8866   DEFVAR_LISP ("standard-translation-table-for-decode",
8867                &Vstandard_translation_table_for_decode,
8868                doc: /* Table for translating characters while decoding.  */);
8869   Vstandard_translation_table_for_decode = Qnil;
8870
8871   DEFVAR_LISP ("standard-translation-table-for-encode",
8872                &Vstandard_translation_table_for_encode,
8873                doc: /* Table for translating characters while encoding.  */);
8874   Vstandard_translation_table_for_encode = Qnil;
8875
8876   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
8877                doc: /* Alist of charsets vs revision numbers.
8878 While encoding, if a charset (car part of an element) is found,
8879 designate it with the escape sequence identifying revision (cdr part
8880 of the element).  */);
8881   Vcharset_revision_table = Qnil;
8882
8883   DEFVAR_LISP ("default-process-coding-system",
8884                &Vdefault_process_coding_system,
8885                doc: /* Cons of coding systems used for process I/O by default.
8886 The car part is used for decoding a process output,
8887 the cdr part is used for encoding a text to be sent to a process.  */);
8888   Vdefault_process_coding_system = Qnil;
8889
8890   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
8891                doc: /*
8892 Table of extra Latin codes in the range 128..159 (inclusive).
8893 This is a vector of length 256.
8894 If Nth element is non-nil, the existence of code N in a file
8895 \(or output of subprocess) doesn't prevent it to be detected as
8896 a coding system of ISO 2022 variant which has a flag
8897 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8898 or reading output of a subprocess.
8899 Only 128th through 159th elements has a meaning.  */);
8900   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
8901
8902   DEFVAR_LISP ("select-safe-coding-system-function",
8903                &Vselect_safe_coding_system_function,
8904                doc: /*
8905 Function to call to select safe coding system for encoding a text.
8906
8907 If set, this function is called to force a user to select a proper
8908 coding system which can encode the text in the case that a default
8909 coding system used in each operation can't encode the text.
8910
8911 The default value is `select-safe-coding-system' (which see).  */);
8912   Vselect_safe_coding_system_function = Qnil;
8913
8914   DEFVAR_BOOL ("inhibit-iso-escape-detection",
8915                &inhibit_iso_escape_detection,
8916                doc: /*
8917 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
8918
8919 By default, on reading a file, Emacs tries to detect how the text is
8920 encoded.  This code detection is sensitive to escape sequences.  If
8921 the sequence is valid as ISO2022, the code is determined as one of
8922 the ISO2022 encodings, and the file is decoded by the corresponding
8923 coding system (e.g. `iso-2022-7bit').
8924
8925 However, there may be a case that you want to read escape sequences in
8926 a file as is.  In such a case, you can set this variable to non-nil.
8927 Then, as the code detection ignores any escape sequences, no file is
8928 detected as encoded in some ISO2022 encoding.  The result is that all
8929 escape sequences become visible in a buffer.
8930
8931 The default value is nil, and it is strongly recommended not to change
8932 it.  That is because many Emacs Lisp source files that contain
8933 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8934 in Emacs's distribution, and they won't be decoded correctly on
8935 reading if you suppress escape sequence detection.
8936
8937 The other way to read escape sequences in a file without decoding is
8938 to explicitly specify some coding system that doesn't use ISO2022's
8939 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
8940   inhibit_iso_escape_detection = 0;
8941
8942   {
8943     Lisp_Object args[coding_arg_max];
8944     Lisp_Object plist[14];
8945     int i;
8946
8947     for (i = 0; i < coding_arg_max; i++)
8948       args[i] = Qnil;
8949
8950     plist[0] = intern (":name");
8951     plist[1] = args[coding_arg_name] = Qno_conversion;
8952     plist[2] = intern (":mnemonic");
8953     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8954     plist[4] = intern (":coding-type");
8955     plist[5] = args[coding_arg_coding_type] = Qraw_text;
8956     plist[6] = intern (":ascii-compatible-p");
8957     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8958     plist[8] = intern (":default-char");
8959     plist[9] = args[coding_arg_default_char] = make_number (0);
8960     plist[10] = intern (":docstring");
8961     plist[11] = build_string ("Do no conversion.\n\
8962 \n\
8963 When you visit a file with this coding, the file is read into a\n\
8964 unibyte buffer as is, thus each byte of a file is treated as a\n\
8965 character.");
8966     plist[12] = intern (":eol-type");
8967     plist[13] = args[coding_arg_eol_type] = Qunix;
8968     args[coding_arg_plist] = Flist (14, plist);
8969     Fdefine_coding_system_internal (coding_arg_max, args);
8970   }
8971
8972   setup_coding_system (Qno_conversion, &keyboard_coding);
8973   setup_coding_system (Qno_conversion, &terminal_coding);
8974   setup_coding_system (Qno_conversion, &safe_terminal_coding);
8975
8976   {
8977     int i;
8978
8979     for (i = 0; i < coding_category_max; i++)
8980       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
8981   }
8982 }
8983
8984 char *
8985 emacs_strerror (error_number)
8986      int error_number;
8987 {
8988   char *str;
8989
8990   synchronize_system_messages_locale ();
8991   str = strerror (error_number);
8992
8993   if (! NILP (Vlocale_coding_system))
8994     {
8995       Lisp_Object dec = code_convert_string_norecord (build_string (str),
8996                                                       Vlocale_coding_system,
8997                                                       0);
8998       str = (char *) XSTRING (dec)->data;
8999     }
9000
9001   return str;
9002 }
9003
9004 #endif /* emacs */