src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001, 2002 Free Software Foundation, Inc.
   5    Copyright (C) 2001, 2002
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H13PRO009
   8
   9 This file is part of GNU Emacs.
  10
  11 GNU Emacs is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU General Public License as published by
  13 the Free Software Foundation; either version 2, or (at your option)
  14 any later version.
  15
  16 GNU Emacs is distributed in the hope that it will be useful,
  17 but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 GNU General Public License for more details.
  20
  21 You should have received a copy of the GNU General Public License
  22 along with GNU Emacs; see the file COPYING.  If not, write to
  23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  24 Boston, MA 02111-1307, USA.  */
  25
  26 /*** TABLE OF CONTENTS ***
  27
  28   0. General comments
  29   1. Preamble
  30   2. Emacs' internal format (emacs-utf-8) handlers
  31   3. UTF-8 handlers
  32   4. UTF-16 handlers
  33   5. Charset-base coding systems handlers
  34   6. emacs-mule (old Emacs' internal format) handlers
  35   7. ISO2022 handlers
  36   8. Shift-JIS and BIG5 handlers
  37   9. CCL handlers
  38   10. C library functions
  39   11. Emacs Lisp library functions
  40   12. Postamble
  41
  42 */
  43
  44 /*** 0. General comments ***
  45
  46
  47 CODING SYSTEM
  48
  49   A coding system is an object for an encoding mechanism that contains
  50   information about how to convert byte sequences to character
  51   sequences and vice versa.  When we say "decode", it means converting
  52   a byte sequence of a specific coding system into a character
  53   sequence that is represented by Emacs' internal coding system
  54   `emacs-utf-8', and when we say "encode", it means converting a
  55   character sequence of emacs-utf-8 to a byte sequence of a specific
  56   coding system.
  57
  58   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  59   C level, a coding system is represented by a vector of attributes
  60   stored in the hash table Vcharset_hash_table.  The conversion from
  61   coding system symbol to attributes vector is done by looking up
  62   Vcharset_hash_table by the symbol.
  63
  64   Coding systems are classified into the following types depending on
  65   the encoding mechanism.  Here's a brief description of the types.
  66
  67   o UTF-8
  68
  69   o UTF-16
  70
  71   o Charset-base coding system
  72
  73   A coding system defined by one or more (coded) character sets.
  74   Decoding and encoding are done by a code converter defined for each
  75   character set.
  76
  77   o Old Emacs internal format (emacs-mule)
  78
  79   The coding system adopted by old versions of Emacs (20 and 21).
  80
  81   o ISO2022-base coding system
  82
  83   The most famous coding system for multiple character sets.  X's
  84   Compound Text, various EUCs (Extended Unix Code), and coding systems
  85   used in the Internet communication such as ISO-2022-JP are all
  86   variants of ISO2022.
  87
  88   o SJIS (or Shift-JIS or MS-Kanji-Code)
  89
  90   A coding system to encode character sets: ASCII, JISX0201, and
  91   JISX0208.  Widely used for PC's in Japan.  Details are described in
  92   section 8.
  93
  94   o BIG5
  95
  96   A coding system to encode character sets: ASCII and Big5.  Widely
  97   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  98   described in section 8.  In this file, when we write "big5" (all
  99   lowercase), we mean the coding system, and when we write "Big5"
 100   (capitalized), we mean the character set.
 101
 102   o CCL
 103
 104   If a user wants to decode/encode text encoded in a coding system
 105   not listed above, he can supply a decoder and an encoder for it in
 106   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 107   program while decoding/encoding.
 108
 109   o Raw-text
 110
 111   A coding system for text containing raw eight-bit data.  Emacs
 112   treats each byte of source text as a character (except for
 113   end-of-line conversion).
 114
 115   o No-conversion
 116
 117   Like raw text, but don't do end-of-line conversion.
 118
 119
 120 END-OF-LINE FORMAT
 121
 122   How text end-of-line is encoded depends on operating system.  For
 123   instance, Unix's format is just one byte of LF (line-feed) code,
 124   whereas DOS's format is two-byte sequence of `carriage-return' and
 125   `line-feed' codes.  MacOS's format is usually one byte of
 126   `carriage-return'.
 127
 128   Since text character encoding and end-of-line encoding are
 129   independent, any coding system described above can take any format
 130   of end-of-line (except for no-conversion).
 131
 132 STRUCT CODING_SYSTEM
 133
 134   Before using a coding system for code conversion (i.e. decoding and
 135   encoding), we setup a structure of type `struct coding_system'.
 136   This structure keeps various information about a specific code
 137   conversion (e.g. the location of source and destination data).
 138
 139 */
 140
 141 /* COMMON MACROS */
 142
 143
 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 145
 146   These functions check if a byte sequence specified as a source in
 147   CODING conforms to the format of XXX, and update the members of
 148   DETECT_INFO.
 149
 150   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 151
 152   Below is the template of these functions.  */
 153
 154 #if 0
 155 static int
 156 detect_coding_XXX (coding, detect_info)
 157      struct coding_system *coding;
 158      struct coding_detection_info *detect_info;
 159 {
 160   unsigned char *src = coding->source;
 161   unsigned char *src_end = coding->source + coding->src_bytes;
 162   int multibytep = coding->src_multibyte;
 163   int consumed_chars = 0;
 164   int found = 0;
 165   ...;
 166
 167   while (1)
 168     {
 169       /* Get one byte from the source.  If the souce is exausted, jump
 170          to no_more_source:.  */
 171       ONE_MORE_BYTE (c);
 172
 173       if (! __C_conforms_to_XXX___ (c))
 174         break;
 175       if (! __C_strongly_suggests_XXX__ (c))
 176         found = CATEGORY_MASK_XXX;
 177     }
 178   /* The byte sequence is invalid for XXX.  */
 179   detect_info->rejected |= CATEGORY_MASK_XXX;
 180   return 0;
 181
 182  no_more_source:
 183   /* The source exausted successfully.  */
 184   detect_info->found |= found;
 185   return 1;
 186 }
 187 #endif
 188
 189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 190
 191   These functions decode a byte sequence specified as a source by
 192   CODING.  The resulting multibyte text goes to a place pointed to by
 193   CODING->charbuf, the length of which should not exceed
 194   CODING->charbuf_size;
 195
 196   These functions set the information of original and decoded texts in
 197   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 198   They also set CODING->result to one of CODING_RESULT_XXX indicating
 199   how the decoding is finished.
 200
 201   Below is the template of these functions.  */
 202
 203 #if 0
 204 static void
 205 decode_coding_XXXX (coding)
 206      struct coding_system *coding;
 207 {
 208   unsigned char *src = coding->source + coding->consumed;
 209   unsigned char *src_end = coding->source + coding->src_bytes;
 210   /* SRC_BASE remembers the start position in source in each loop.
 211      The loop will be exited when there's not enough source code, or
 212      when there's no room in CHARBUF for a decoded character.  */
 213   unsigned char *src_base;
 214   /* A buffer to produce decoded characters.  */
 215   int *charbuf = coding->charbuf;
 216   int *charbuf_end = charbuf + coding->charbuf_size;
 217   int multibytep = coding->src_multibyte;
 218
 219   while (1)
 220     {
 221       src_base = src;
 222       if (charbuf < charbuf_end)
 223         /* No more room to produce a decoded character.  */
 224         break;
 225       ONE_MORE_BYTE (c);
 226       /* Decode it. */
 227     }
 228
 229  no_more_source:
 230   if (src_base < src_end
 231       && coding->mode & CODING_MODE_LAST_BLOCK)
 232     /* If the source ends by partial bytes to construct a character,
 233        treat them as eight-bit raw data.  */
 234     while (src_base < src_end && charbuf < charbuf_end)
 235       *charbuf++ = *src_base++;
 236   /* Remember how many bytes and characters we consumed.  If the
 237      source is multibyte, the bytes and chars are not identical.  */
 238   coding->consumed = coding->consumed_char = src_base - coding->source;
 239   /* Remember how many characters we produced.  */
 240   coding->charbuf_used = charbuf - coding->charbuf;
 241 }
 242 #endif
 243
 244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 245
 246   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 247   internal multibyte format by CODING.  The resulting byte sequence
 248   goes to a place pointed to by DESTINATION, the length of which
 249   should not exceed DST_BYTES.
 250
 251   These functions set the information of original and encoded texts in
 252   the members produced, produced_char, consumed, and consumed_char of
 253   the structure *CODING.  They also set the member result to one of
 254   CODING_RESULT_XXX indicating how the encoding finished.
 255
 256   DST_BYTES zero means that source area and destination area are
 257   overlapped, which means that we can produce a encoded text until it
 258   reaches at the head of not-yet-encoded source text.
 259
 260   Below is a template of these functions.  */
 261 #if 0
 262 static void
 263 encode_coding_XXX (coding)
 264      struct coding_system *coding;
 265 {
 266   int multibytep = coding->dst_multibyte;
 267   int *charbuf = coding->charbuf;
 268   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 269   unsigned char *dst = coding->destination + coding->produced;
 270   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 271   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 272   int produced_chars = 0;
 273
 274   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 275     {
 276       int c = *charbuf;
 277       /* Encode C into DST, and increment DST.  */
 278     }
 279  label_no_more_destination:
 280   /* How many chars and bytes we produced.  */
 281   coding->produced_char += produced_chars;
 282   coding->produced = dst - coding->destination;
 283 }
 284 #endif
 285
 286 \f
 287 /*** 1. Preamble ***/
 288
 289 #include <config.h>
 290 #include <stdio.h>
 291
 292 #include "lisp.h"
 293 #include "buffer.h"
 294 #include "character.h"
 295 #include "charset.h"
 296 #include "ccl.h"
 297 #include "composite.h"
 298 #include "coding.h"
 299 #include "window.h"
 300
 301 Lisp_Object Vcoding_system_hash_table;
 302
 303 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 304 Lisp_Object Qunix, Qdos;
 305 extern Lisp_Object Qmac;        /* frame.c */
 306 Lisp_Object Qbuffer_file_coding_system;
 307 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 308 Lisp_Object Qdefault_char;
 309 Lisp_Object Qno_conversion, Qundecided;
 310 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 311 Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
 312 Lisp_Object Qcoding_system_history;
 313 Lisp_Object Qvalid_codes;
 314
 315 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 316 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 317 Lisp_Object Qstart_process, Qopen_network_stream;
 318 Lisp_Object Qtarget_idx;
 319
 320 Lisp_Object Vselect_safe_coding_system_function;
 321
 322 /* Mnemonic string for each format of end-of-line.  */
 323 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 324 /* Mnemonic string to indicate format of end-of-line is not yet
 325    decided.  */
 326 Lisp_Object eol_mnemonic_undecided;
 327
 328 #ifdef emacs
 329
 330 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 331
 332 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 333
 334 /* Coding system emacs-mule and raw-text are for converting only
 335    end-of-line format.  */
 336 Lisp_Object Qemacs_mule, Qraw_text;
 337
 338 /* Coding-systems are handed between Emacs Lisp programs and C internal
 339    routines by the following three variables.  */
 340 /* Coding-system for reading files and receiving data from process.  */
 341 Lisp_Object Vcoding_system_for_read;
 342 /* Coding-system for writing files and sending data to process.  */
 343 Lisp_Object Vcoding_system_for_write;
 344 /* Coding-system actually used in the latest I/O.  */
 345 Lisp_Object Vlast_coding_system_used;
 346
 347 /* A vector of length 256 which contains information about special
 348    Latin codes (especially for dealing with Microsoft codes).  */
 349 Lisp_Object Vlatin_extra_code_table;
 350
 351 /* Flag to inhibit code conversion of end-of-line format.  */
 352 int inhibit_eol_conversion;
 353
 354 /* Flag to inhibit ISO2022 escape sequence detection.  */
 355 int inhibit_iso_escape_detection;
 356
 357 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 358 int inherit_process_coding_system;
 359
 360 /* Coding system to be used to encode text for terminal display.  */
 361 struct coding_system terminal_coding;
 362
 363 /* Coding system to be used to encode text for terminal display when
 364    terminal coding system is nil.  */
 365 struct coding_system safe_terminal_coding;
 366
 367 /* Coding system of what is sent from terminal keyboard.  */
 368 struct coding_system keyboard_coding;
 369
 370 Lisp_Object Vfile_coding_system_alist;
 371 Lisp_Object Vprocess_coding_system_alist;
 372 Lisp_Object Vnetwork_coding_system_alist;
 373
 374 Lisp_Object Vlocale_coding_system;
 375
 376 #endif /* emacs */
 377
 378 /* Flag to tell if we look up translation table on character code
 379    conversion.  */
 380 Lisp_Object Venable_character_translation;
 381 /* Standard translation table to look up on decoding (reading).  */
 382 Lisp_Object Vstandard_translation_table_for_decode;
 383 /* Standard translation table to look up on encoding (writing).  */
 384 Lisp_Object Vstandard_translation_table_for_encode;
 385
 386 Lisp_Object Qtranslation_table;
 387 Lisp_Object Qtranslation_table_id;
 388 Lisp_Object Qtranslation_table_for_decode;
 389 Lisp_Object Qtranslation_table_for_encode;
 390
 391 /* Alist of charsets vs revision number.  */
 392 static Lisp_Object Vcharset_revision_table;
 393
 394 /* Default coding systems used for process I/O.  */
 395 Lisp_Object Vdefault_process_coding_system;
 396
 397 /* Global flag to tell that we can't call post-read-conversion and
 398    pre-write-conversion functions.  Usually the value is zero, but it
 399    is set to 1 temporarily while such functions are running.  This is
 400    to avoid infinite recursive call.  */
 401 static int inhibit_pre_post_conversion;
 402
 403 /* Two special coding systems.  */
 404 Lisp_Object Vsjis_coding_system;
 405 Lisp_Object Vbig5_coding_system;
 406
 407
 408 static int detect_coding_utf_8 P_ ((struct coding_system *,
 409                                     struct coding_detection_info *info));
 410 static void decode_coding_utf_8 P_ ((struct coding_system *));
 411 static int encode_coding_utf_8 P_ ((struct coding_system *));
 412
 413 static int detect_coding_utf_16 P_ ((struct coding_system *,
 414                                      struct coding_detection_info *info));
 415 static void decode_coding_utf_16 P_ ((struct coding_system *));
 416 static int encode_coding_utf_16 P_ ((struct coding_system *));
 417
 418 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 419                                        struct coding_detection_info *info));
 420 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 421 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 422
 423 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 424                                          struct coding_detection_info *info));
 425 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 426 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 427
 428 static int detect_coding_sjis P_ ((struct coding_system *,
 429                                    struct coding_detection_info *info));
 430 static void decode_coding_sjis P_ ((struct coding_system *));
 431 static int encode_coding_sjis P_ ((struct coding_system *));
 432
 433 static int detect_coding_big5 P_ ((struct coding_system *,
 434                                    struct coding_detection_info *info));
 435 static void decode_coding_big5 P_ ((struct coding_system *));
 436 static int encode_coding_big5 P_ ((struct coding_system *));
 437
 438 static int detect_coding_ccl P_ ((struct coding_system *,
 439                                   struct coding_detection_info *info));
 440 static void decode_coding_ccl P_ ((struct coding_system *));
 441 static int encode_coding_ccl P_ ((struct coding_system *));
 442
 443 static void decode_coding_raw_text P_ ((struct coding_system *));
 444 static int encode_coding_raw_text P_ ((struct coding_system *));
 445
 446
 447 /* ISO2022 section */
 448
 449 #define CODING_ISO_INITIAL(coding, reg)                 \
 450   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 451                      coding_attr_iso_initial),          \
 452                reg)))
 453
 454
 455 #define CODING_ISO_REQUEST(coding, charset_id)  \
 456   ((charset_id <= (coding)->max_charset_id      \
 457     ? (coding)->safe_charsets[charset_id]       \
 458     : -1))
 459
 460
 461 #define CODING_ISO_FLAGS(coding)        \
 462   ((coding)->spec.iso_2022.flags)
 463 #define CODING_ISO_DESIGNATION(coding, reg)     \
 464   ((coding)->spec.iso_2022.current_designation[reg])
 465 #define CODING_ISO_INVOCATION(coding, plane)    \
 466   ((coding)->spec.iso_2022.current_invocation[plane])
 467 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 468   ((coding)->spec.iso_2022.single_shifting)
 469 #define CODING_ISO_BOL(coding)  \
 470   ((coding)->spec.iso_2022.bol)
 471 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 472   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 473
 474 /* Control characters of ISO2022.  */
 475                         /* code */      /* function */
 476 #define ISO_CODE_LF     0x0A            /* line-feed */
 477 #define ISO_CODE_CR     0x0D            /* carriage-return */
 478 #define ISO_CODE_SO     0x0E            /* shift-out */
 479 #define ISO_CODE_SI     0x0F            /* shift-in */
 480 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 481 #define ISO_CODE_ESC    0x1B            /* escape */
 482 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 483 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 484 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 485
 486 /* All code (1-byte) of ISO2022 is classified into one of the
 487    followings.  */
 488 enum iso_code_class_type
 489   {
 490     ISO_control_0,              /* Control codes in the range
 491                                    0x00..0x1F and 0x7F, except for the
 492                                    following 5 codes.  */
 493     ISO_carriage_return,        /* ISO_CODE_CR (0x0D) */
 494     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 495     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 496     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 497     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 498     ISO_control_1,              /* Control codes in the range
 499                                    0x80..0x9F, except for the
 500                                    following 3 codes.  */
 501     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 502     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 503     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 504     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 505     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 506     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 507     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 508   };
 509
 510 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 511     `iso-flags' attribute of an iso2022 coding system.  */
 512
 513 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 514    instead of the correct short-form sequence (e.g. ESC $ A).  */
 515 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 516
 517 /* If set, reset graphic planes and registers at end-of-line to the
 518    initial state.  */
 519 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 520
 521 /* If set, reset graphic planes and registers before any control
 522    characters to the initial state.  */
 523 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 524
 525 /* If set, encode by 7-bit environment.  */
 526 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 527
 528 /* If set, use locking-shift function.  */
 529 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 530
 531 /* If set, use single-shift function.  Overwrite
 532    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 533 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 534
 535 /* If set, use designation escape sequence.  */
 536 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 537
 538 /* If set, produce revision number sequence.  */
 539 #define CODING_ISO_FLAG_REVISION        0x0080
 540
 541 /* If set, produce ISO6429's direction specifying sequence.  */
 542 #define CODING_ISO_FLAG_DIRECTION       0x0100
 543
 544 /* If set, assume designation states are reset at beginning of line on
 545    output.  */
 546 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 547
 548 /* If set, designation sequence should be placed at beginning of line
 549    on output.  */
 550 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 551
 552 /* If set, do not encode unsafe charactes on output.  */
 553 #define CODING_ISO_FLAG_SAFE            0x0800
 554
 555 /* If set, extra latin codes (128..159) are accepted as a valid code
 556    on input.  */
 557 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 558
 559 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 560
 561 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 562
 563 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 564
 565 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 566
 567 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 568
 569 /* A character to be produced on output if encoding of the original
 570    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 571 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 572
 573
 574 /* UTF-16 section */
 575 #define CODING_UTF_16_BOM(coding)       \
 576   ((coding)->spec.utf_16.bom)
 577
 578 #define CODING_UTF_16_ENDIAN(coding)    \
 579   ((coding)->spec.utf_16.endian)
 580
 581 #define CODING_UTF_16_SURROGATE(coding) \
 582   ((coding)->spec.utf_16.surrogate)
 583
 584
 585 /* CCL section */
 586 #define CODING_CCL_DECODER(coding)      \
 587   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 588 #define CODING_CCL_ENCODER(coding)      \
 589   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 590 #define CODING_CCL_VALIDS(coding)                                          \
 591   (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
 592    ->data)
 593
 594 /* Index for each coding category in `coding_categories' */
 595
 596 enum coding_category
 597   {
 598     coding_category_iso_7,
 599     coding_category_iso_7_tight,
 600     coding_category_iso_8_1,
 601     coding_category_iso_8_2,
 602     coding_category_iso_7_else,
 603     coding_category_iso_8_else,
 604     coding_category_utf_8,
 605     coding_category_utf_16_auto,
 606     coding_category_utf_16_be,
 607     coding_category_utf_16_le,
 608     coding_category_utf_16_be_nosig,
 609     coding_category_utf_16_le_nosig,
 610     coding_category_charset,
 611     coding_category_sjis,
 612     coding_category_big5,
 613     coding_category_ccl,
 614     coding_category_emacs_mule,
 615     /* All above are targets of code detection.  */
 616     coding_category_raw_text,
 617     coding_category_undecided,
 618     coding_category_max
 619   };
 620
 621 /* Definitions of flag bits used in detect_coding_XXXX.  */
 622 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 623 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 624 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 625 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 626 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 627 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 628 #define CATEGORY_MASK_UTF_8             (1 << coding_category_utf_8)
 629 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 630 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 631 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 632 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 633 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 634 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 635 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 636 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 637 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 638 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 639
 640 /* This value is returned if detect_coding_mask () find nothing other
 641    than ASCII characters.  */
 642 #define CATEGORY_MASK_ANY               \
 643   (CATEGORY_MASK_ISO_7                  \
 644    | CATEGORY_MASK_ISO_7_TIGHT          \
 645    | CATEGORY_MASK_ISO_8_1              \
 646    | CATEGORY_MASK_ISO_8_2              \
 647    | CATEGORY_MASK_ISO_7_ELSE           \
 648    | CATEGORY_MASK_ISO_8_ELSE           \
 649    | CATEGORY_MASK_UTF_8                \
 650    | CATEGORY_MASK_UTF_16_BE            \
 651    | CATEGORY_MASK_UTF_16_LE            \
 652    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 653    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 654    | CATEGORY_MASK_CHARSET              \
 655    | CATEGORY_MASK_SJIS                 \
 656    | CATEGORY_MASK_BIG5                 \
 657    | CATEGORY_MASK_CCL                  \
 658    | CATEGORY_MASK_EMACS_MULE)
 659
 660
 661 #define CATEGORY_MASK_ISO_7BIT \
 662   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 663
 664 #define CATEGORY_MASK_ISO_8BIT \
 665   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 666
 667 #define CATEGORY_MASK_ISO_ELSE \
 668   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 669
 670 #define CATEGORY_MASK_ISO_ESCAPE        \
 671   (CATEGORY_MASK_ISO_7                  \
 672    | CATEGORY_MASK_ISO_7_TIGHT          \
 673    | CATEGORY_MASK_ISO_7_ELSE           \
 674    | CATEGORY_MASK_ISO_8_ELSE)
 675
 676 #define CATEGORY_MASK_ISO       \
 677   (  CATEGORY_MASK_ISO_7BIT     \
 678      | CATEGORY_MASK_ISO_8BIT   \
 679      | CATEGORY_MASK_ISO_ELSE)
 680
 681 #define CATEGORY_MASK_UTF_16            \
 682   (CATEGORY_MASK_UTF_16_BE              \
 683    | CATEGORY_MASK_UTF_16_LE            \
 684    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 685    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 686
 687
 688 /* List of symbols `coding-category-xxx' ordered by priority.  This
 689    variable is exposed to Emacs Lisp.  */
 690 static Lisp_Object Vcoding_category_list;
 691
 692 /* Table of coding categories (Lisp symbols).  This variable is for
 693    internal use oly.  */
 694 static Lisp_Object Vcoding_category_table;
 695
 696 /* Table of coding-categories ordered by priority.  */
 697 static enum coding_category coding_priorities[coding_category_max];
 698
 699 /* Nth element is a coding context for the coding system bound to the
 700    Nth coding category.  */
 701 static struct coding_system coding_categories[coding_category_max];
 702
 703 static int detected_mask[coding_category_raw_text] =
 704   { CATEGORY_MASK_ISO,
 705     CATEGORY_MASK_ISO,
 706     CATEGORY_MASK_ISO,
 707     CATEGORY_MASK_ISO,
 708     CATEGORY_MASK_ISO,
 709     CATEGORY_MASK_ISO,
 710     CATEGORY_MASK_UTF_8,
 711     CATEGORY_MASK_UTF_16,
 712     CATEGORY_MASK_UTF_16,
 713     CATEGORY_MASK_UTF_16,
 714     CATEGORY_MASK_UTF_16,
 715     CATEGORY_MASK_UTF_16,
 716     CATEGORY_MASK_CHARSET,
 717     CATEGORY_MASK_SJIS,
 718     CATEGORY_MASK_BIG5,
 719     CATEGORY_MASK_CCL,
 720     CATEGORY_MASK_EMACS_MULE
 721   };
 722
 723 /*** Commonly used macros and functions ***/
 724
 725 #ifndef min
 726 #define min(a, b) ((a) < (b) ? (a) : (b))
 727 #endif
 728 #ifndef max
 729 #define max(a, b) ((a) > (b) ? (a) : (b))
 730 #endif
 731
 732 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list)  \
 733   do {                                                          \
 734     attrs = CODING_ID_ATTRS (coding->id);                       \
 735     eol_type = CODING_ID_EOL_TYPE (coding->id);                 \
 736     if (VECTORP (eol_type))                                     \
 737       eol_type = Qunix;                                         \
 738     charset_list = CODING_ATTR_CHARSET_LIST (attrs);            \
 739   } while (0)
 740
 741
 742 /* Safely get one byte from the source text pointed by SRC which ends
 743    at SRC_END, and set C to that byte.  If there are not enough bytes
 744    in the source, it jumps to `no_more_source'.  The caller
 745    should declare and set these variables appropriately in advance:
 746         src, src_end, multibytep
 747 */
 748
 749 #define ONE_MORE_BYTE(c)                                        \
 750   do {                                                          \
 751     if (src == src_end)                                         \
 752       {                                                         \
 753         if (src_base < src)                                     \
 754           coding->result = CODING_RESULT_INSUFFICIENT_SRC;      \
 755         goto no_more_source;                                    \
 756       }                                                         \
 757     c = *src++;                                                 \
 758     if (multibytep && (c & 0x80))                               \
 759       {                                                         \
 760         if ((c & 0xFE) != 0xC0)                                 \
 761           error ("Undecodable char found");                     \
 762         c = ((c & 1) << 6) | *src++;                            \
 763       }                                                         \
 764     consumed_chars++;                                           \
 765   } while (0)
 766
 767
 768 #define ONE_MORE_BYTE_NO_CHECK(c)               \
 769   do {                                          \
 770     c = *src++;                                 \
 771     if (multibytep && (c & 0x80))               \
 772       {                                         \
 773         if ((c & 0xFE) != 0xC0)                 \
 774           error ("Undecodable char found");     \
 775         c = ((c & 1) << 6) | *src++;            \
 776       }                                         \
 777     consumed_chars++;                           \
 778   } while (0)
 779
 780
 781 /* Store a byte C in the place pointed by DST and increment DST to the
 782    next free point, and increment PRODUCED_CHARS.  The caller should
 783    assure that C is 0..127, and declare and set the variable `dst'
 784    appropriately in advance.
 785 */
 786
 787
 788 #define EMIT_ONE_ASCII_BYTE(c)  \
 789   do {                          \
 790     produced_chars++;           \
 791     *dst++ = (c);               \
 792   } while (0)
 793
 794
 795 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 796
 797 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 798   do {                                  \
 799     produced_chars += 2;                \
 800     *dst++ = (c1), *dst++ = (c2);       \
 801   } while (0)
 802
 803
 804 /* Store a byte C in the place pointed by DST and increment DST to the
 805    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 806    nonzero, store in an appropriate multibyte from.  The caller should
 807    declare and set the variables `dst' and `multibytep' appropriately
 808    in advance.  */
 809
 810 #define EMIT_ONE_BYTE(c)                \
 811   do {                                  \
 812     produced_chars++;                   \
 813     if (multibytep)                     \
 814       {                                 \
 815         int ch = (c);                   \
 816         if (ch >= 0x80)                 \
 817           ch = BYTE8_TO_CHAR (ch);      \
 818         CHAR_STRING_ADVANCE (ch, dst);  \
 819       }                                 \
 820     else                                \
 821       *dst++ = (c);                     \
 822   } while (0)
 823
 824
 825 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 826
 827 #define EMIT_TWO_BYTES(c1, c2)          \
 828   do {                                  \
 829     produced_chars += 2;                \
 830     if (multibytep)                     \
 831       {                                 \
 832         int ch;                         \
 833                                         \
 834         ch = (c1);                      \
 835         if (ch >= 0x80)                 \
 836           ch = BYTE8_TO_CHAR (ch);      \
 837         CHAR_STRING_ADVANCE (ch, dst);  \
 838         ch = (c2);                      \
 839         if (ch >= 0x80)                 \
 840           ch = BYTE8_TO_CHAR (ch);      \
 841         CHAR_STRING_ADVANCE (ch, dst);  \
 842       }                                 \
 843     else                                \
 844       {                                 \
 845         *dst++ = (c1);                  \
 846         *dst++ = (c2);                  \
 847       }                                 \
 848   } while (0)
 849
 850
 851 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 852   do {                                  \
 853     EMIT_ONE_BYTE (c1);                 \
 854     EMIT_TWO_BYTES (c2, c3);            \
 855   } while (0)
 856
 857
 858 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 859   do {                                          \
 860     EMIT_TWO_BYTES (c1, c2);                    \
 861     EMIT_TWO_BYTES (c3, c4);                    \
 862   } while (0)
 863
 864
 865 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 866   do {                                                                       \
 867     charset_map_loaded = 0;                                                  \
 868     c = DECODE_CHAR (charset, code);                                         \
 869     if (charset_map_loaded)                                                  \
 870       {                                                                      \
 871         unsigned char *orig = coding->source;                                \
 872         EMACS_INT offset;                                                    \
 873                                                                              \
 874         coding_set_source (coding);                                          \
 875         offset = coding->source - orig;                                      \
 876         src += offset;                                                       \
 877         src_base += offset;                                                  \
 878         src_end += offset;                                                   \
 879       }                                                                      \
 880   } while (0)
 881
 882
 883 #define ASSURE_DESTINATION(bytes)                               \
 884   do {                                                          \
 885     if (dst + (bytes) >= dst_end)                               \
 886       {                                                         \
 887         int more_bytes = charbuf_end - charbuf + (bytes);       \
 888                                                                 \
 889         dst = alloc_destination (coding, more_bytes, dst);      \
 890         dst_end = coding->destination + coding->dst_bytes;      \
 891       }                                                         \
 892   } while (0)
 893
 894
 895
 896 static void
 897 coding_set_source (coding)
 898      struct coding_system *coding;
 899 {
 900   if (BUFFERP (coding->src_object))
 901     {
 902       struct buffer *buf = XBUFFER (coding->src_object);
 903
 904       if (coding->src_pos < 0)
 905         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
 906       else
 907         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
 908     }
 909   else if (STRINGP (coding->src_object))
 910     {
 911       coding->source = (XSTRING (coding->src_object)->data
 912                         + coding->src_pos_byte);
 913     }
 914   else
 915     /* Otherwise, the source is C string and is never relocated
 916        automatically.  Thus we don't have to update anything.  */
 917     ;
 918 }
 919
 920 static void
 921 coding_set_destination (coding)
 922      struct coding_system *coding;
 923 {
 924   if (BUFFERP (coding->dst_object))
 925     {
 926       if (coding->src_pos < 0)
 927         {
 928           coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
 929           coding->dst_bytes = (GAP_END_ADDR
 930                                - (coding->src_bytes - coding->consumed)
 931                                - coding->destination);
 932         }
 933       else
 934         {
 935           /* We are sure that coding->dst_pos_byte is before the gap
 936              of the buffer. */
 937           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
 938                                  + coding->dst_pos_byte - 1);
 939           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
 940                                - coding->destination);
 941         }
 942     }
 943   else
 944     /* Otherwise, the destination is C string and is never relocated
 945        automatically.  Thus we don't have to update anything.  */
 946     ;
 947 }
 948
 949
 950 static void
 951 coding_alloc_by_realloc (coding, bytes)
 952      struct coding_system *coding;
 953      EMACS_INT bytes;
 954 {
 955   coding->destination = (unsigned char *) xrealloc (coding->destination,
 956                                                     coding->dst_bytes + bytes);
 957   coding->dst_bytes += bytes;
 958 }
 959
 960 static void
 961 coding_alloc_by_making_gap (coding, bytes)
 962      struct coding_system *coding;
 963      EMACS_INT bytes;
 964 {
 965   if (BUFFERP (coding->dst_object)
 966       && EQ (coding->src_object, coding->dst_object))
 967     {
 968       EMACS_INT add = coding->src_bytes - coding->consumed;
 969
 970       GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
 971       make_gap (bytes);
 972       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
 973     }
 974   else
 975     {
 976       Lisp_Object this_buffer;
 977
 978       this_buffer = Fcurrent_buffer ();
 979       set_buffer_internal (XBUFFER (coding->dst_object));
 980       make_gap (bytes);
 981       set_buffer_internal (XBUFFER (this_buffer));
 982     }
 983 }
 984
 985
 986 static unsigned char *
 987 alloc_destination (coding, nbytes, dst)
 988      struct coding_system *coding;
 989      int nbytes;
 990      unsigned char *dst;
 991 {
 992   EMACS_INT offset = dst - coding->destination;
 993
 994   if (BUFFERP (coding->dst_object))
 995     coding_alloc_by_making_gap (coding, nbytes);
 996   else
 997     coding_alloc_by_realloc (coding, nbytes);
 998   coding->result = CODING_RESULT_SUCCESS;
 999   coding_set_destination (coding);
1000   dst = coding->destination + offset;
1001   return dst;
1002 }
1003
1004 /** Macros for annotations.  */
1005
1006 /* Maximum length of annotation data (sum of annotations for
1007    composition and charset).  */
1008 #define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
1009
1010 /* An annotation data is stored in the array coding->charbuf in this
1011    format:
1012      [ -LENGTH ANNOTATION_MASK FROM TO ... ]
1013    LENGTH is the number of elements in the annotation.
1014    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1015    FROM and TO specify the range of text annotated.  They are relative
1016    to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
1017
1018    The format of the following elements depend on ANNOTATION_MASK.
1019
1020    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1021    follows:
1022      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1023    METHOD is one of enum composition_method.
1024    Optionnal COMPOSITION-COMPONENTS are characters and composition
1025    rules.
1026
1027    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1028    follows.  */
1029
1030 #define ADD_ANNOTATION_DATA(buf, len, mask, from, to)   \
1031   do {                                                  \
1032     *(buf)++ = -(len);                                  \
1033     *(buf)++ = (mask);                                  \
1034     *(buf)++ = (from);                                  \
1035     *(buf)++ = (to);                                    \
1036     coding->annotated = 1;                              \
1037   } while (0);
1038
1039 #define ADD_COMPOSITION_DATA(buf, from, to, method)                           \
1040   do {                                                                        \
1041     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1042     *buf++ = method;                                                          \
1043   } while (0)
1044
1045
1046 #define ADD_CHARSET_DATA(buf, from, to, id)                               \
1047   do {                                                                    \
1048     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1049     *buf++ = id;                                                          \
1050   } while (0)
1051
1052 \f
1053 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1054
1055
1056
1057 \f
1058 /*** 3. UTF-8 ***/
1059
1060 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1061    Check if a text is encoded in UTF-8.  If it is, return 1, else
1062    return 0.  */
1063
1064 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1065 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1066 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1067 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1068 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1069 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1070
1071 static int
1072 detect_coding_utf_8 (coding, detect_info)
1073      struct coding_system *coding;
1074      struct coding_detection_info *detect_info;
1075 {
1076   unsigned char *src = coding->source, *src_base = src;
1077   unsigned char *src_end = coding->source + coding->src_bytes;
1078   int multibytep = coding->src_multibyte;
1079   int consumed_chars = 0;
1080   int found = 0;
1081   int incomplete;
1082
1083   detect_info->checked |= CATEGORY_MASK_UTF_8;
1084   /* A coding system of this category is always ASCII compatible.  */
1085   src += coding->head_ascii;
1086
1087   while (1)
1088     {
1089       int c, c1, c2, c3, c4;
1090
1091       incomplete = 0;
1092       ONE_MORE_BYTE (c);
1093       if (UTF_8_1_OCTET_P (c))
1094         continue;
1095       incomplete = 1;
1096       ONE_MORE_BYTE (c1);
1097       if (! UTF_8_EXTRA_OCTET_P (c1))
1098         break;
1099       if (UTF_8_2_OCTET_LEADING_P (c))
1100         {
1101           found = CATEGORY_MASK_UTF_8;
1102           continue;
1103         }
1104       ONE_MORE_BYTE (c2);
1105       if (! UTF_8_EXTRA_OCTET_P (c2))
1106         break;
1107       if (UTF_8_3_OCTET_LEADING_P (c))
1108         {
1109           found = CATEGORY_MASK_UTF_8;
1110           continue;
1111         }
1112       ONE_MORE_BYTE (c3);
1113       if (! UTF_8_EXTRA_OCTET_P (c3))
1114         break;
1115       if (UTF_8_4_OCTET_LEADING_P (c))
1116         {
1117           found = CATEGORY_MASK_UTF_8;
1118           continue;
1119         }
1120       ONE_MORE_BYTE (c4);
1121       if (! UTF_8_EXTRA_OCTET_P (c4))
1122         break;
1123       if (UTF_8_5_OCTET_LEADING_P (c))
1124         {
1125           found = CATEGORY_MASK_UTF_8;
1126           continue;
1127         }
1128       break;
1129     }
1130   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1131   return 0;
1132
1133  no_more_source:
1134   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1135     {
1136       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1137       return 0;
1138     }
1139   detect_info->found |= found;
1140   return 1;
1141 }
1142
1143
1144 static void
1145 decode_coding_utf_8 (coding)
1146      struct coding_system *coding;
1147 {
1148   unsigned char *src = coding->source + coding->consumed;
1149   unsigned char *src_end = coding->source + coding->src_bytes;
1150   unsigned char *src_base;
1151   int *charbuf = coding->charbuf;
1152   int *charbuf_end = charbuf + coding->charbuf_size;
1153   int consumed_chars = 0, consumed_chars_base;
1154   int multibytep = coding->src_multibyte;
1155   Lisp_Object attr, eol_type, charset_list;
1156
1157   CODING_GET_INFO (coding, attr, eol_type, charset_list);
1158
1159   while (1)
1160     {
1161       int c, c1, c2, c3, c4, c5;
1162
1163       src_base = src;
1164       consumed_chars_base = consumed_chars;
1165
1166       if (charbuf >= charbuf_end)
1167         break;
1168
1169       ONE_MORE_BYTE (c1);
1170       if (UTF_8_1_OCTET_P(c1))
1171         {
1172           c = c1;
1173           if (c == '\r')
1174             {
1175               if (EQ (eol_type, Qdos))
1176                 {
1177                   if (src == src_end)
1178                     {
1179                       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1180                       goto no_more_source;
1181                     }
1182                   if (*src == '\n')
1183                     ONE_MORE_BYTE (c);
1184                 }
1185               else if (EQ (eol_type, Qmac))
1186                 c = '\n';
1187             }
1188         }
1189       else
1190         {
1191           ONE_MORE_BYTE (c2);
1192           if (! UTF_8_EXTRA_OCTET_P (c2))
1193             goto invalid_code;
1194           if (UTF_8_2_OCTET_LEADING_P (c1))
1195             {
1196               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1197               /* Reject overlong sequences here and below.  Encoders
1198                  producing them are incorrect, they can be misleading,
1199                  and they mess up read/write invariance.  */
1200               if (c < 128)
1201                 goto invalid_code;
1202             }
1203           else
1204             {
1205               ONE_MORE_BYTE (c3);
1206               if (! UTF_8_EXTRA_OCTET_P (c3))
1207                 goto invalid_code;
1208               if (UTF_8_3_OCTET_LEADING_P (c1))
1209                 {
1210                   c = (((c1 & 0xF) << 12)
1211                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1212                   if (c < 0x800
1213                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1214                     goto invalid_code;
1215                 }
1216               else
1217                 {
1218                   ONE_MORE_BYTE (c4);
1219                   if (! UTF_8_EXTRA_OCTET_P (c4))
1220                     goto invalid_code;
1221                   if (UTF_8_4_OCTET_LEADING_P (c1))
1222                     {
1223                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1224                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1225                     if (c < 0x10000)
1226                       goto invalid_code;
1227                     }
1228                   else
1229                     {
1230                       ONE_MORE_BYTE (c5);
1231                       if (! UTF_8_EXTRA_OCTET_P (c5))
1232                         goto invalid_code;
1233                       if (UTF_8_5_OCTET_LEADING_P (c1))
1234                         {
1235                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1236                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1237                                | (c5 & 0x3F));
1238                           if ((c > MAX_CHAR) || (c < 0x200000))
1239                             goto invalid_code;
1240                         }
1241                       else
1242                         goto invalid_code;
1243                     }
1244                 }
1245             }
1246         }
1247
1248       *charbuf++ = c;
1249       continue;
1250
1251     invalid_code:
1252       src = src_base;
1253       consumed_chars = consumed_chars_base;
1254       ONE_MORE_BYTE (c);
1255       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1256       coding->errors++;
1257     }
1258
1259  no_more_source:
1260   coding->consumed_char += consumed_chars_base;
1261   coding->consumed = src_base - coding->source;
1262   coding->charbuf_used = charbuf - coding->charbuf;
1263 }
1264
1265
1266 static int
1267 encode_coding_utf_8 (coding)
1268      struct coding_system *coding;
1269 {
1270   int multibytep = coding->dst_multibyte;
1271   int *charbuf = coding->charbuf;
1272   int *charbuf_end = charbuf + coding->charbuf_used;
1273   unsigned char *dst = coding->destination + coding->produced;
1274   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1275   int produced_chars = 0;
1276   int c;
1277
1278   if (multibytep)
1279     {
1280       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1281
1282       while (charbuf < charbuf_end)
1283         {
1284           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1285
1286           ASSURE_DESTINATION (safe_room);
1287           c = *charbuf++;
1288           if (CHAR_BYTE8_P (c))
1289             {
1290               c = CHAR_TO_BYTE8 (c);
1291               EMIT_ONE_BYTE (c);
1292             }
1293           else
1294             {
1295               CHAR_STRING_ADVANCE (c, pend);
1296               for (p = str; p < pend; p++)
1297                 EMIT_ONE_BYTE (*p);
1298             }
1299         }
1300     }
1301   else
1302     {
1303       int safe_room = MAX_MULTIBYTE_LENGTH;
1304
1305       while (charbuf < charbuf_end)
1306         {
1307           ASSURE_DESTINATION (safe_room);
1308           c = *charbuf++;
1309           dst += CHAR_STRING (c, dst);
1310           produced_chars++;
1311         }
1312     }
1313   coding->result = CODING_RESULT_SUCCESS;
1314   coding->produced_char += produced_chars;
1315   coding->produced = dst - coding->destination;
1316   return 0;
1317 }
1318
1319
1320 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1321    Check if a text is encoded in one of UTF-16 based coding systems.
1322    If it is, return 1, else return 0.  */
1323
1324 #define UTF_16_HIGH_SURROGATE_P(val) \
1325   (((val) & 0xFC00) == 0xD800)
1326
1327 #define UTF_16_LOW_SURROGATE_P(val) \
1328   (((val) & 0xFC00) == 0xDC00)
1329
1330 #define UTF_16_INVALID_P(val)   \
1331   (((val) == 0xFFFE)            \
1332    || ((val) == 0xFFFF)         \
1333    || UTF_16_LOW_SURROGATE_P (val))
1334
1335
1336 static int
1337 detect_coding_utf_16 (coding, detect_info)
1338      struct coding_system *coding;
1339      struct coding_detection_info *detect_info;
1340 {
1341   unsigned char *src = coding->source, *src_base = src;
1342   unsigned char *src_end = coding->source + coding->src_bytes;
1343   int multibytep = coding->src_multibyte;
1344   int consumed_chars = 0;
1345   int c1, c2;
1346
1347   detect_info->checked |= CATEGORY_MASK_UTF_16;
1348
1349   if (coding->mode & CODING_MODE_LAST_BLOCK
1350       && (coding->src_bytes & 1))
1351     {
1352       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1353       return 0;
1354     }
1355   ONE_MORE_BYTE (c1);
1356   ONE_MORE_BYTE (c2);
1357
1358   if ((c1 == 0xFF) && (c2 == 0xFE))
1359     {
1360       detect_info->found |= CATEGORY_MASK_UTF_16_LE;
1361       detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
1362     }
1363   else if ((c1 == 0xFE) && (c2 == 0xFF))
1364     {
1365       detect_info->found |= CATEGORY_MASK_UTF_16_BE;
1366       detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
1367     }
1368  no_more_source:
1369   return 1;
1370 }
1371
1372 static void
1373 decode_coding_utf_16 (coding)
1374      struct coding_system *coding;
1375 {
1376   unsigned char *src = coding->source + coding->consumed;
1377   unsigned char *src_end = coding->source + coding->src_bytes;
1378   unsigned char *src_base;
1379   int *charbuf = coding->charbuf;
1380   int *charbuf_end = charbuf + coding->charbuf_size;
1381   int consumed_chars = 0, consumed_chars_base;
1382   int multibytep = coding->src_multibyte;
1383   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1384   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1385   int surrogate = CODING_UTF_16_SURROGATE (coding);
1386   Lisp_Object attr, eol_type, charset_list;
1387
1388   CODING_GET_INFO (coding, attr, eol_type, charset_list);
1389
1390   if (bom != utf_16_without_bom)
1391     {
1392       int c, c1, c2;
1393
1394       src_base = src;
1395       ONE_MORE_BYTE (c1);
1396       ONE_MORE_BYTE (c2);
1397       c = (c1 << 8) | c2;
1398       if (bom == utf_16_with_bom)
1399         {
1400           if (endian == utf_16_big_endian
1401               ? c != 0xFEFF : c != 0xFFFE)
1402             {
1403               /* We are sure that there's enouph room at CHARBUF.  */
1404               *charbuf++ = c1;
1405               *charbuf++ = c2;
1406               coding->errors++;
1407             }
1408         }
1409       else
1410         {
1411           if (c == 0xFEFF)
1412             CODING_UTF_16_ENDIAN (coding)
1413               = endian = utf_16_big_endian;
1414           else if (c == 0xFFFE)
1415             CODING_UTF_16_ENDIAN (coding)
1416               = endian = utf_16_little_endian;
1417           else
1418             {
1419               CODING_UTF_16_ENDIAN (coding)
1420                 = endian = utf_16_big_endian;
1421               src = src_base;
1422             }
1423         }
1424       CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1425     }
1426
1427   while (1)
1428     {
1429       int c, c1, c2;
1430
1431       src_base = src;
1432       consumed_chars_base = consumed_chars;
1433
1434       if (charbuf + 2 >= charbuf_end)
1435         break;
1436
1437       ONE_MORE_BYTE (c1);
1438       ONE_MORE_BYTE (c2);
1439       c = (endian == utf_16_big_endian
1440            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1441       if (surrogate)
1442         {
1443           if (! UTF_16_LOW_SURROGATE_P (c))
1444             {
1445               if (endian == utf_16_big_endian)
1446                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1447               else
1448                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1449               *charbuf++ = c1;
1450               *charbuf++ = c2;
1451               coding->errors++;
1452               if (UTF_16_HIGH_SURROGATE_P (c))
1453                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1454               else
1455                 *charbuf++ = c;
1456             }
1457           else
1458             {
1459               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1460               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1461               *charbuf++ = c;
1462             }
1463         }
1464       else
1465         {
1466           if (UTF_16_HIGH_SURROGATE_P (c))
1467             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1468           else
1469             *charbuf++ = c;
1470         }
1471     }
1472
1473  no_more_source:
1474   coding->consumed_char += consumed_chars_base;
1475   coding->consumed = src_base - coding->source;
1476   coding->charbuf_used = charbuf - coding->charbuf;
1477 }
1478
1479 static int
1480 encode_coding_utf_16 (coding)
1481      struct coding_system *coding;
1482 {
1483   int multibytep = coding->dst_multibyte;
1484   int *charbuf = coding->charbuf;
1485   int *charbuf_end = charbuf + coding->charbuf_used;
1486   unsigned char *dst = coding->destination + coding->produced;
1487   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1488   int safe_room = 8;
1489   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1490   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1491   int produced_chars = 0;
1492   Lisp_Object attrs, eol_type, charset_list;
1493   int c;
1494
1495   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1496
1497   if (bom == utf_16_with_bom)
1498     {
1499       ASSURE_DESTINATION (safe_room);
1500       if (big_endian)
1501         EMIT_TWO_BYTES (0xFE, 0xFF);
1502       else
1503         EMIT_TWO_BYTES (0xFF, 0xFE);
1504       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1505     }
1506
1507   while (charbuf < charbuf_end)
1508     {
1509       ASSURE_DESTINATION (safe_room);
1510       c = *charbuf++;
1511       if (c >= MAX_UNICODE_CHAR)
1512         c = coding->default_char;
1513
1514       if (c < 0x10000)
1515         {
1516           if (big_endian)
1517             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1518           else
1519             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1520         }
1521       else
1522         {
1523           int c1, c2;
1524
1525           c -= 0x10000;
1526           c1 = (c >> 10) + 0xD800;
1527           c2 = (c & 0x3FF) + 0xDC00;
1528           if (big_endian)
1529             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1530           else
1531             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1532         }
1533     }
1534   coding->result = CODING_RESULT_SUCCESS;
1535   coding->produced = dst - coding->destination;
1536   coding->produced_char += produced_chars;
1537   return 0;
1538 }
1539
1540 \f
1541 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1542
1543 /* Emacs' internal format for representation of multiple character
1544    sets is a kind of multi-byte encoding, i.e. characters are
1545    represented by variable-length sequences of one-byte codes.
1546
1547    ASCII characters and control characters (e.g. `tab', `newline') are
1548    represented by one-byte sequences which are their ASCII codes, in
1549    the range 0x00 through 0x7F.
1550
1551    8-bit characters of the range 0x80..0x9F are represented by
1552    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1553    code + 0x20).
1554
1555    8-bit characters of the range 0xA0..0xFF are represented by
1556    one-byte sequences which are their 8-bit code.
1557
1558    The other characters are represented by a sequence of `base
1559    leading-code', optional `extended leading-code', and one or two
1560    `position-code's.  The length of the sequence is determined by the
1561    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1562    whereas extended leading-code and position-code take the range 0xA0
1563    through 0xFF.  See `charset.h' for more details about leading-code
1564    and position-code.
1565
1566    --- CODE RANGE of Emacs' internal format ---
1567    character set        range
1568    -------------        -----
1569    ascii                0x00..0x7F
1570    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1571    eight-bit-graphic    0xA0..0xBF
1572    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1573    ---------------------------------------------
1574
1575    As this is the internal character representation, the format is
1576    usually not used externally (i.e. in a file or in a data sent to a
1577    process).  But, it is possible to have a text externally in this
1578    format (i.e. by encoding by the coding system `emacs-mule').
1579
1580    In that case, a sequence of one-byte codes has a slightly different
1581    form.
1582
1583    At first, all characters in eight-bit-control are represented by
1584    one-byte sequences which are their 8-bit code.
1585
1586    Next, character composition data are represented by the byte
1587    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1588    where,
1589         METHOD is 0xF0 plus one of composition method (enum
1590         composition_method),
1591
1592         BYTES is 0xA0 plus a byte length of this composition data,
1593
1594         CHARS is 0x20 plus a number of characters composed by this
1595         data,
1596
1597         COMPONENTs are characters of multibye form or composition
1598         rules encoded by two-byte of ASCII codes.
1599
1600    In addition, for backward compatibility, the following formats are
1601    also recognized as composition data on decoding.
1602
1603    0x80 MSEQ ...
1604    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1605
1606    Here,
1607         MSEQ is a multibyte form but in these special format:
1608           ASCII: 0xA0 ASCII_CODE+0x80,
1609           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1610         RULE is a one byte code of the range 0xA0..0xF0 that
1611         represents a composition rule.
1612   */
1613
1614 char emacs_mule_bytes[256];
1615
1616 int
1617 emacs_mule_char (coding, src, nbytes, nchars, id)
1618      struct coding_system *coding;
1619      unsigned char *src;
1620      int *nbytes, *nchars, *id;
1621 {
1622   unsigned char *src_end = coding->source + coding->src_bytes;
1623   int multibytep = coding->src_multibyte;
1624   unsigned char *src_base = src;
1625   struct charset *charset;
1626   unsigned code;
1627   int c;
1628   int consumed_chars = 0;
1629
1630   ONE_MORE_BYTE (c);
1631   switch (emacs_mule_bytes[c])
1632     {
1633     case 2:
1634       if (! (charset = emacs_mule_charset[c]))
1635         goto invalid_code;
1636       ONE_MORE_BYTE (c);
1637       code = c & 0x7F;
1638       break;
1639
1640     case 3:
1641       if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1642           || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1643         {
1644           ONE_MORE_BYTE (c);
1645           if (! (charset = emacs_mule_charset[c]))
1646             goto invalid_code;
1647           ONE_MORE_BYTE (c);
1648           code = c & 0x7F;
1649         }
1650       else
1651         {
1652           if (! (charset = emacs_mule_charset[c]))
1653             goto invalid_code;
1654           ONE_MORE_BYTE (c);
1655           code = (c & 0x7F) << 8;
1656           ONE_MORE_BYTE (c);
1657           code |= c & 0x7F;
1658         }
1659       break;
1660
1661     case 4:
1662       ONE_MORE_BYTE (c);
1663       if (! (charset = emacs_mule_charset[c]))
1664         goto invalid_code;
1665       ONE_MORE_BYTE (c);
1666       code = (c & 0x7F) << 8;
1667       ONE_MORE_BYTE (c);
1668       code |= c & 0x7F;
1669       break;
1670
1671     case 1:
1672       code = c;
1673       charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1674                                  ? charset_ascii : charset_eight_bit);
1675       break;
1676
1677     default:
1678       abort ();
1679     }
1680   c = DECODE_CHAR (charset, code);
1681   if (c < 0)
1682     goto invalid_code;
1683   *nbytes = src - src_base;
1684   *nchars = consumed_chars;
1685   if (id)
1686     *id = charset->id;
1687   return c;
1688
1689  no_more_source:
1690   return -2;
1691
1692  invalid_code:
1693   return -1;
1694 }
1695
1696
1697 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1698    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1699    else return 0.  */
1700
1701 static int
1702 detect_coding_emacs_mule (coding, detect_info)
1703      struct coding_system *coding;
1704      struct coding_detection_info *detect_info;
1705 {
1706   unsigned char *src = coding->source, *src_base = src;
1707   unsigned char *src_end = coding->source + coding->src_bytes;
1708   int multibytep = coding->src_multibyte;
1709   int consumed_chars = 0;
1710   int c;
1711   int found = 0;
1712   int incomplete;
1713
1714   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1715   /* A coding system of this category is always ASCII compatible.  */
1716   src += coding->head_ascii;
1717
1718   while (1)
1719     {
1720       incomplete = 0;
1721       ONE_MORE_BYTE (c);
1722       incomplete = 1;
1723
1724       if (c == 0x80)
1725         {
1726           /* Perhaps the start of composite character.  We simple skip
1727              it because analyzing it is too heavy for detecting.  But,
1728              at least, we check that the composite character
1729              constitues of more than 4 bytes.  */
1730           unsigned char *src_base;
1731
1732         repeat:
1733           src_base = src;
1734           do
1735             {
1736               ONE_MORE_BYTE (c);
1737             }
1738           while (c >= 0xA0);
1739
1740           if (src - src_base <= 4)
1741             break;
1742           found = CATEGORY_MASK_EMACS_MULE;
1743           if (c == 0x80)
1744             goto repeat;
1745         }
1746
1747       if (c < 0x80)
1748         {
1749           if (c < 0x20
1750               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1751             break;
1752         }
1753       else
1754         {
1755           unsigned char *src_base = src - 1;
1756
1757           do
1758             {
1759               ONE_MORE_BYTE (c);
1760             }
1761           while (c >= 0xA0);
1762           if (src - src_base != emacs_mule_bytes[*src_base])
1763             break;
1764           found = CATEGORY_MASK_EMACS_MULE;
1765         }
1766     }
1767   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1768   return 0;
1769
1770  no_more_source:
1771   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1772     {
1773       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1774       return 0;
1775     }
1776   detect_info->found |= found;
1777   return 1;
1778 }
1779
1780
1781 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1782
1783 /* Decode a character represented as a component of composition
1784    sequence of Emacs 20/21 style at SRC.  Set C to that character and
1785    update SRC to the head of next character (or an encoded composition
1786    rule).  If SRC doesn't points a composition component, set C to -1.
1787    If SRC points an invalid byte sequence, global exit by a return
1788    value 0.  */
1789
1790 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
1791   if (1)                                                        \
1792     {                                                           \
1793       int c;                                                    \
1794       int nbytes, nchars;                                       \
1795                                                                 \
1796       if (src == src_end)                                       \
1797         break;                                                  \
1798       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1799       if (c < 0)                                                \
1800         {                                                       \
1801           if (c == -2)                                          \
1802             break;                                              \
1803           goto invalid_code;                                    \
1804         }                                                       \
1805       *buf++ = c;                                               \
1806       src += nbytes;                                            \
1807       consumed_chars += nchars;                                 \
1808     }                                                           \
1809   else
1810
1811
1812 /* Decode a composition rule represented as a component of composition
1813    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
1814    and increment BUF.  If SRC points an invalid byte sequence, set C
1815    to -1.  */
1816
1817 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
1818   do {                                                  \
1819     int c, gref, nref;                                  \
1820                                                         \
1821     if (src >= src_end)                                 \
1822       goto invalid_code;                                \
1823     ONE_MORE_BYTE_NO_CHECK (c);                         \
1824     c -= 0x20;                                          \
1825     if (c < 0 || c >= 81)                               \
1826       goto invalid_code;                                \
1827                                                         \
1828     gref = c / 9, nref = c % 9;                         \
1829     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1830   } while (0)
1831
1832
1833 /* Decode a composition rule represented as a component of composition
1834    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
1835    and increment BUF.  If SRC points an invalid byte sequence, set C
1836    to -1.  */
1837
1838 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
1839   do {                                                  \
1840     int gref, nref;                                     \
1841                                                         \
1842     if (src + 1>= src_end)                              \
1843       goto invalid_code;                                \
1844     ONE_MORE_BYTE_NO_CHECK (gref);                      \
1845     gref -= 0x20;                                       \
1846     ONE_MORE_BYTE_NO_CHECK (nref);                      \
1847     nref -= 0x20;                                       \
1848     if (gref < 0 || gref >= 81                          \
1849         || nref < 0 || nref >= 81)                      \
1850       goto invalid_code;                                \
1851     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1852   } while (0)
1853
1854
1855 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
1856   do {                                                                  \
1857     /* Emacs 21 style format.  The first three bytes at SRC are         \
1858        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
1859        the byte length of this composition information, CHARS is the    \
1860        number of characters composed by this composition.  */           \
1861     enum composition_method method = c - 0xF2;                          \
1862     int *charbuf_base = charbuf;                                        \
1863     int from, to;                                                       \
1864     int consumed_chars_limit;                                           \
1865     int nbytes, nchars;                                                 \
1866                                                                         \
1867     ONE_MORE_BYTE (c);                                                  \
1868     nbytes = c - 0xA0;                                                  \
1869     if (nbytes < 3)                                                     \
1870       goto invalid_code;                                                \
1871     ONE_MORE_BYTE (c);                                                  \
1872     nchars = c - 0xA0;                                                  \
1873     from = coding->produced + char_offset;                              \
1874     to = from + nchars;                                                 \
1875     ADD_COMPOSITION_DATA (charbuf, from, to, method);                   \
1876     consumed_chars_limit = consumed_chars_base + nbytes;                \
1877     if (method != COMPOSITION_RELATIVE)                                 \
1878       {                                                                 \
1879         int i = 0;                                                      \
1880         while (consumed_chars < consumed_chars_limit)                   \
1881           {                                                             \
1882             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
1883               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
1884             else                                                        \
1885               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
1886             i++;                                                        \
1887           }                                                             \
1888         if (consumed_chars < consumed_chars_limit)                      \
1889           goto invalid_code;                                            \
1890         charbuf_base[0] -= i;                                           \
1891       }                                                                 \
1892   } while (0)
1893
1894
1895 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)            \
1896   do {                                                          \
1897     /* Emacs 20 style format for relative composition.  */      \
1898     /* Store multibyte form of characters to be composed.  */   \
1899     enum composition_method method = COMPOSITION_RELATIVE;      \
1900     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1901     int *buf = components;                                      \
1902     int i, j;                                                   \
1903     int from, to;                                               \
1904                                                                 \
1905     src = src_base;                                             \
1906     ONE_MORE_BYTE (c);          /* skip 0x80 */                 \
1907     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1908       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                 \
1909     if (i < 2)                                                  \
1910       goto invalid_code;                                        \
1911     from = coding->produced_char + char_offset;                 \
1912     to = from + i;                                              \
1913     ADD_COMPOSITION_DATA (charbuf, from, to, method);           \
1914     for (j = 0; j < i; j++)                                     \
1915       *charbuf++ = components[j];                               \
1916   } while (0)
1917
1918
1919 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
1920   do {                                                          \
1921     /* Emacs 20 style format for rule-base composition.  */     \
1922     /* Store multibyte form of characters to be composed.  */   \
1923     enum composition_method method = COMPOSITION_WITH_RULE;     \
1924     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1925     int *buf = components;                                      \
1926     int i, j;                                                   \
1927     int from, to;                                               \
1928                                                                 \
1929     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
1930     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1931       {                                                         \
1932         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
1933         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
1934       }                                                         \
1935     if (i < 1 || (buf - components) % 2 == 0)                   \
1936       goto invalid_code;                                        \
1937     if (charbuf + i + (i / 2) + 1 < charbuf_end)                \
1938       goto no_more_source;                                      \
1939     from = coding->produced_char + char_offset;                 \
1940     to = from + i;                                              \
1941     ADD_COMPOSITION_DATA (buf, from, to, method);               \
1942     for (j = 0; j < i; j++)                                     \
1943       *charbuf++ = components[j];                               \
1944     for (j = 0; j < i; j += 2)                                  \
1945       *charbuf++ = components[j];                               \
1946   } while (0)
1947
1948
1949 static void
1950 decode_coding_emacs_mule (coding)
1951      struct coding_system *coding;
1952 {
1953   unsigned char *src = coding->source + coding->consumed;
1954   unsigned char *src_end = coding->source + coding->src_bytes;
1955   unsigned char *src_base;
1956   int *charbuf = coding->charbuf;
1957   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
1958   int consumed_chars = 0, consumed_chars_base;
1959   int multibytep = coding->src_multibyte;
1960   Lisp_Object attrs, eol_type, charset_list;
1961   int char_offset = coding->produced_char;
1962   int last_offset = char_offset;
1963   int last_id = charset_ascii;
1964
1965   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1966
1967   while (1)
1968     {
1969       int c;
1970
1971       src_base = src;
1972       consumed_chars_base = consumed_chars;
1973
1974       if (charbuf >= charbuf_end)
1975         break;
1976
1977       ONE_MORE_BYTE (c);
1978
1979       if (c < 0x80)
1980         {
1981           if (c == '\r')
1982             {
1983               if (EQ (eol_type, Qdos))
1984                 {
1985                   if (src == src_end)
1986                     {
1987                       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1988                       goto no_more_source;
1989                     }
1990                   if (*src == '\n')
1991                     ONE_MORE_BYTE (c);
1992                 }
1993               else if (EQ (eol_type, Qmac))
1994                 c = '\n';
1995             }
1996           *charbuf++ = c;
1997           char_offset++;
1998         }
1999       else if (c == 0x80)
2000         {
2001           ONE_MORE_BYTE (c);
2002           if (c - 0xF2 >= COMPOSITION_RELATIVE
2003               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2004             DECODE_EMACS_MULE_21_COMPOSITION (c);
2005           else if (c < 0xC0)
2006             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2007           else if (c == 0xFF)
2008             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2009           else
2010             goto invalid_code;
2011         }
2012       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2013         {
2014           int nbytes, nchars;
2015           int id;
2016
2017           src = src_base;
2018           consumed_chars = consumed_chars_base;
2019           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2020           if (c < 0)
2021             {
2022               if (c == -2)
2023                 break;
2024               goto invalid_code;
2025             }
2026           if (last_id != id)
2027             {
2028               if (last_id != charset_ascii)
2029                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2030               last_id = id;
2031               last_offset = char_offset;
2032             }
2033           *charbuf++ = c;
2034           src += nbytes;
2035           consumed_chars += nchars;
2036           char_offset++;
2037         }
2038       continue;
2039
2040     invalid_code:
2041       src = src_base;
2042       consumed_chars = consumed_chars_base;
2043       ONE_MORE_BYTE (c);
2044       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2045       char_offset++;
2046       coding->errors++;
2047     }
2048
2049  no_more_source:
2050   if (last_id != charset_ascii)
2051     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2052   coding->consumed_char += consumed_chars_base;
2053   coding->consumed = src_base - coding->source;
2054   coding->charbuf_used = charbuf - coding->charbuf;
2055 }
2056
2057
2058 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2059   do {                                          \
2060     if (id < 0xA0)                              \
2061       codes[0] = id, codes[1] = 0;              \
2062     else if (id < 0xE0)                         \
2063       codes[0] = 0x9A, codes[1] = id;           \
2064     else if (id < 0xF0)                         \
2065       codes[0] = 0x9B, codes[1] = id;           \
2066     else if (id < 0xF5)                         \
2067       codes[0] = 0x9C, codes[1] = id;           \
2068     else                                        \
2069       codes[0] = 0x9D, codes[1] = id;           \
2070   } while (0);
2071
2072
2073 static int
2074 encode_coding_emacs_mule (coding)
2075      struct coding_system *coding;
2076 {
2077   int multibytep = coding->dst_multibyte;
2078   int *charbuf = coding->charbuf;
2079   int *charbuf_end = charbuf + coding->charbuf_used;
2080   unsigned char *dst = coding->destination + coding->produced;
2081   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2082   int safe_room = 8;
2083   int produced_chars = 0;
2084   Lisp_Object attrs, eol_type, charset_list;
2085   int c;
2086   int preferred_charset_id = -1;
2087
2088   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2089
2090   while (charbuf < charbuf_end)
2091     {
2092       ASSURE_DESTINATION (safe_room);
2093       c = *charbuf++;
2094
2095       if (c < 0)
2096         {
2097           /* Handle an annotation.  */
2098           switch (*charbuf)
2099             {
2100             case CODING_ANNOTATE_COMPOSITION_MASK:
2101               /* Not yet implemented.  */
2102               break;
2103             case CODING_ANNOTATE_CHARSET_MASK:
2104               preferred_charset_id = charbuf[3];
2105               if (preferred_charset_id >= 0
2106                   && NILP (Fmemq (make_number (preferred_charset_id),
2107                                   charset_list)))
2108                 preferred_charset_id = -1;
2109               break;
2110             default:
2111               abort ();
2112             }
2113           charbuf += -c - 1;
2114           continue;
2115         }
2116
2117       if (ASCII_CHAR_P (c))
2118         EMIT_ONE_ASCII_BYTE (c);
2119       else if (CHAR_BYTE8_P (c))
2120         {
2121           c = CHAR_TO_BYTE8 (c);
2122           EMIT_ONE_BYTE (c);
2123         }
2124       else
2125         {
2126           struct charset *charset;
2127           unsigned code;
2128           int dimension;
2129           int emacs_mule_id;
2130           unsigned char leading_codes[2];
2131
2132           if (preferred_charset_id >= 0)
2133             {
2134               charset = CHARSET_FROM_ID (preferred_charset_id);
2135               if (! CHAR_CHARSET_P (c, charset))
2136                 charset = char_charset (c, charset_list, NULL);
2137             }
2138           else
2139             charset = char_charset (c, charset_list, &code);
2140           if (! charset)
2141             {
2142               c = coding->default_char;
2143               if (ASCII_CHAR_P (c))
2144                 {
2145                   EMIT_ONE_ASCII_BYTE (c);
2146                   continue;
2147                 }
2148               charset = char_charset (c, charset_list, &code);
2149             }
2150           dimension = CHARSET_DIMENSION (charset);
2151           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2152           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2153           EMIT_ONE_BYTE (leading_codes[0]);
2154           if (leading_codes[1])
2155             EMIT_ONE_BYTE (leading_codes[1]);
2156           if (dimension == 1)
2157             EMIT_ONE_BYTE (code);
2158           else
2159             {
2160               EMIT_ONE_BYTE (code >> 8);
2161               EMIT_ONE_BYTE (code & 0xFF);
2162             }
2163         }
2164     }
2165   coding->result = CODING_RESULT_SUCCESS;
2166   coding->produced_char += produced_chars;
2167   coding->produced = dst - coding->destination;
2168   return 0;
2169 }
2170
2171 \f
2172 /*** 7. ISO2022 handlers ***/
2173
2174 /* The following note describes the coding system ISO2022 briefly.
2175    Since the intention of this note is to help understand the
2176    functions in this file, some parts are NOT ACCURATE or are OVERLY
2177    SIMPLIFIED.  For thorough understanding, please refer to the
2178    original document of ISO2022.  This is equivalent to the standard
2179    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2180
2181    ISO2022 provides many mechanisms to encode several character sets
2182    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2183    is encoded using bytes less than 128.  This may make the encoded
2184    text a little bit longer, but the text passes more easily through
2185    several types of gateway, some of which strip off the MSB (Most
2186    Significant Bit).
2187
2188    There are two kinds of character sets: control character sets and
2189    graphic character sets.  The former contain control characters such
2190    as `newline' and `escape' to provide control functions (control
2191    functions are also provided by escape sequences).  The latter
2192    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2193    two control character sets and many graphic character sets.
2194
2195    Graphic character sets are classified into one of the following
2196    four classes, according to the number of bytes (DIMENSION) and
2197    number of characters in one dimension (CHARS) of the set:
2198    - DIMENSION1_CHARS94
2199    - DIMENSION1_CHARS96
2200    - DIMENSION2_CHARS94
2201    - DIMENSION2_CHARS96
2202
2203    In addition, each character set is assigned an identification tag,
2204    unique for each set, called the "final character" (denoted as <F>
2205    hereafter).  The <F> of each character set is decided by ECMA(*)
2206    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2207    (0x30..0x3F are for private use only).
2208
2209    Note (*): ECMA = European Computer Manufacturers Association
2210
2211    Here are examples of graphic character sets [NAME(<F>)]:
2212         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2213         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2214         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2215         o DIMENSION2_CHARS96 -- none for the moment
2216
2217    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2218         C0 [0x00..0x1F] -- control character plane 0
2219         GL [0x20..0x7F] -- graphic character plane 0
2220         C1 [0x80..0x9F] -- control character plane 1
2221         GR [0xA0..0xFF] -- graphic character plane 1
2222
2223    A control character set is directly designated and invoked to C0 or
2224    C1 by an escape sequence.  The most common case is that:
2225    - ISO646's  control character set is designated/invoked to C0, and
2226    - ISO6429's control character set is designated/invoked to C1,
2227    and usually these designations/invocations are omitted in encoded
2228    text.  In a 7-bit environment, only C0 can be used, and a control
2229    character for C1 is encoded by an appropriate escape sequence to
2230    fit into the environment.  All control characters for C1 are
2231    defined to have corresponding escape sequences.
2232
2233    A graphic character set is at first designated to one of four
2234    graphic registers (G0 through G3), then these graphic registers are
2235    invoked to GL or GR.  These designations and invocations can be
2236    done independently.  The most common case is that G0 is invoked to
2237    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2238    these invocations and designations are omitted in encoded text.
2239    In a 7-bit environment, only GL can be used.
2240
2241    When a graphic character set of CHARS94 is invoked to GL, codes
2242    0x20 and 0x7F of the GL area work as control characters SPACE and
2243    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2244    be used.
2245
2246    There are two ways of invocation: locking-shift and single-shift.
2247    With locking-shift, the invocation lasts until the next different
2248    invocation, whereas with single-shift, the invocation affects the
2249    following character only and doesn't affect the locking-shift
2250    state.  Invocations are done by the following control characters or
2251    escape sequences:
2252
2253    ----------------------------------------------------------------------
2254    abbrev  function                  cntrl escape seq   description
2255    ----------------------------------------------------------------------
2256    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2257    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2258    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2259    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2260    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2261    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2262    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2263    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2264    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2265    ----------------------------------------------------------------------
2266    (*) These are not used by any known coding system.
2267
2268    Control characters for these functions are defined by macros
2269    ISO_CODE_XXX in `coding.h'.
2270
2271    Designations are done by the following escape sequences:
2272    ----------------------------------------------------------------------
2273    escape sequence      description
2274    ----------------------------------------------------------------------
2275    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2276    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2277    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2278    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2279    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2280    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2281    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2282    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2283    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2284    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2285    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2286    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2287    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2288    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2289    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2290    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2291    ----------------------------------------------------------------------
2292
2293    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2294    of dimension 1, chars 94, and final character <F>, etc...
2295
2296    Note (*): Although these designations are not allowed in ISO2022,
2297    Emacs accepts them on decoding, and produces them on encoding
2298    CHARS96 character sets in a coding system which is characterized as
2299    7-bit environment, non-locking-shift, and non-single-shift.
2300
2301    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2302    '(' must be omitted.  We refer to this as "short-form" hereafter.
2303
2304    Now you may notice that there are a lot of ways of encoding the
2305    same multilingual text in ISO2022.  Actually, there exist many
2306    coding systems such as Compound Text (used in X11's inter client
2307    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2308    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2309    localized platforms), and all of these are variants of ISO2022.
2310
2311    In addition to the above, Emacs handles two more kinds of escape
2312    sequences: ISO6429's direction specification and Emacs' private
2313    sequence for specifying character composition.
2314
2315    ISO6429's direction specification takes the following form:
2316         o CSI ']'      -- end of the current direction
2317         o CSI '0' ']'  -- end of the current direction
2318         o CSI '1' ']'  -- start of left-to-right text
2319         o CSI '2' ']'  -- start of right-to-left text
2320    The control character CSI (0x9B: control sequence introducer) is
2321    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2322
2323    Character composition specification takes the following form:
2324         o ESC '0' -- start relative composition
2325         o ESC '1' -- end composition
2326         o ESC '2' -- start rule-base composition (*)
2327         o ESC '3' -- start relative composition with alternate chars  (**)
2328         o ESC '4' -- start rule-base composition with alternate chars  (**)
2329   Since these are not standard escape sequences of any ISO standard,
2330   the use of them with these meanings is restricted to Emacs only.
2331
2332   (*) This form is used only in Emacs 20.7 and older versions,
2333   but newer versions can safely decode it.
2334   (**) This form is used only in Emacs 21.1 and newer versions,
2335   and older versions can't decode it.
2336
2337   Here's a list of example usages of these composition escape
2338   sequences (categorized by `enum composition_method').
2339
2340   COMPOSITION_RELATIVE:
2341         ESC 0 CHAR [ CHAR ] ESC 1
2342   COMPOSITION_WITH_RULE:
2343         ESC 2 CHAR [ RULE CHAR ] ESC 1
2344   COMPOSITION_WITH_ALTCHARS:
2345         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2346   COMPOSITION_WITH_RULE_ALTCHARS:
2347         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2348
2349 enum iso_code_class_type iso_code_class[256];
2350
2351 #define SAFE_CHARSET_P(coding, id)      \
2352   ((id) <= (coding)->max_charset_id     \
2353    && (coding)->safe_charsets[id] >= 0)
2354
2355
2356 #define SHIFT_OUT_OK(category)  \
2357   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2358
2359 static void
2360 setup_iso_safe_charsets (attrs)
2361      Lisp_Object attrs;
2362 {
2363   Lisp_Object charset_list, safe_charsets;
2364   Lisp_Object request;
2365   Lisp_Object reg_usage;
2366   Lisp_Object tail;
2367   int reg94, reg96;
2368   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2369   int max_charset_id;
2370
2371   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2372   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2373       && ! EQ (charset_list, Viso_2022_charset_list))
2374     {
2375       CODING_ATTR_CHARSET_LIST (attrs)
2376         = charset_list = Viso_2022_charset_list;
2377       ASET (attrs, coding_attr_safe_charsets, Qnil);
2378     }
2379
2380   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2381     return;
2382
2383   max_charset_id = 0;
2384   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2385     {
2386       int id = XINT (XCAR (tail));
2387       if (max_charset_id < id)
2388         max_charset_id = id;
2389     }
2390
2391   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2392                                 make_number (255));
2393   request = AREF (attrs, coding_attr_iso_request);
2394   reg_usage = AREF (attrs, coding_attr_iso_usage);
2395   reg94 = XINT (XCAR (reg_usage));
2396   reg96 = XINT (XCDR (reg_usage));
2397
2398   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2399     {
2400       Lisp_Object id;
2401       Lisp_Object reg;
2402       struct charset *charset;
2403
2404       id = XCAR (tail);
2405       charset = CHARSET_FROM_ID (XINT (id));
2406       reg = Fcdr (Fassq (id, request));
2407       if (! NILP (reg))
2408         XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2409       else if (charset->iso_chars_96)
2410         {
2411           if (reg96 < 4)
2412             XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2413         }
2414       else
2415         {
2416           if (reg94 < 4)
2417             XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2418         }
2419     }
2420   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2421 }
2422
2423
2424 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2425    Check if a text is encoded in one of ISO-2022 based codig systems.
2426    If it is, return 1, else return 0.  */
2427
2428 static int
2429 detect_coding_iso_2022 (coding, detect_info)
2430      struct coding_system *coding;
2431      struct coding_detection_info *detect_info;
2432 {
2433   unsigned char *src = coding->source, *src_base = src;
2434   unsigned char *src_end = coding->source + coding->src_bytes;
2435   int multibytep = coding->src_multibyte;
2436   int single_shifting = 0;
2437   int id;
2438   int c, c1;
2439   int consumed_chars = 0;
2440   int i;
2441   int rejected = 0;
2442   int found = 0;
2443
2444   detect_info->checked |= CATEGORY_MASK_ISO;
2445
2446   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2447     {
2448       struct coding_system *this = &(coding_categories[i]);
2449       Lisp_Object attrs, val;
2450
2451       attrs = CODING_ID_ATTRS (this->id);
2452       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2453           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2454         setup_iso_safe_charsets (attrs);
2455       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2456       this->max_charset_id = XSTRING (val)->size - 1;
2457       this->safe_charsets = (char *) XSTRING (val)->data;
2458     }
2459
2460   /* A coding system of this category is always ASCII compatible.  */
2461   src += coding->head_ascii;
2462
2463   while (rejected != CATEGORY_MASK_ISO)
2464     {
2465       ONE_MORE_BYTE (c);
2466       switch (c)
2467         {
2468         case ISO_CODE_ESC:
2469           if (inhibit_iso_escape_detection)
2470             break;
2471           single_shifting = 0;
2472           ONE_MORE_BYTE (c);
2473           if (c >= '(' && c <= '/')
2474             {
2475               /* Designation sequence for a charset of dimension 1.  */
2476               ONE_MORE_BYTE (c1);
2477               if (c1 < ' ' || c1 >= 0x80
2478                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2479                 /* Invalid designation sequence.  Just ignore.  */
2480                 break;
2481             }
2482           else if (c == '$')
2483             {
2484               /* Designation sequence for a charset of dimension 2.  */
2485               ONE_MORE_BYTE (c);
2486               if (c >= '@' && c <= 'B')
2487                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2488                 id = iso_charset_table[1][0][c];
2489               else if (c >= '(' && c <= '/')
2490                 {
2491                   ONE_MORE_BYTE (c1);
2492                   if (c1 < ' ' || c1 >= 0x80
2493                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2494                     /* Invalid designation sequence.  Just ignore.  */
2495                     break;
2496                 }
2497               else
2498                 /* Invalid designation sequence.  Just ignore it.  */
2499                 break;
2500             }
2501           else if (c == 'N' || c == 'O')
2502             {
2503               /* ESC <Fe> for SS2 or SS3.  */
2504               single_shifting = 1;
2505               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2506               break;
2507             }
2508           else if (c >= '0' && c <= '4')
2509             {
2510               /* ESC <Fp> for start/end composition.  */
2511               found |= CATEGORY_MASK_ISO;
2512               break;
2513             }
2514           else
2515             {
2516               /* Invalid escape sequence.  Just ignore it.  */
2517               break;
2518             }
2519
2520           /* We found a valid designation sequence for CHARSET.  */
2521           rejected |= CATEGORY_MASK_ISO_8BIT;
2522           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2523                               id))
2524             found |= CATEGORY_MASK_ISO_7;
2525           else
2526             rejected |= CATEGORY_MASK_ISO_7;
2527           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2528                               id))
2529             found |= CATEGORY_MASK_ISO_7_TIGHT;
2530           else
2531             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2532           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2533                               id))
2534             found |= CATEGORY_MASK_ISO_7_ELSE;
2535           else
2536             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2537           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2538                               id))
2539             found |= CATEGORY_MASK_ISO_8_ELSE;
2540           else
2541             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2542           break;
2543
2544         case ISO_CODE_SO:
2545         case ISO_CODE_SI:
2546           /* Locking shift out/in.  */
2547           if (inhibit_iso_escape_detection)
2548             break;
2549           single_shifting = 0;
2550           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2551           found |= CATEGORY_MASK_ISO_ELSE;
2552           break;
2553
2554         case ISO_CODE_CSI:
2555           /* Control sequence introducer.  */
2556           single_shifting = 0;
2557           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2558           found |= CATEGORY_MASK_ISO_8_ELSE;
2559           goto check_extra_latin;
2560
2561
2562         case ISO_CODE_SS2:
2563         case ISO_CODE_SS3:
2564           /* Single shift.   */
2565           if (inhibit_iso_escape_detection)
2566             break;
2567           single_shifting = 1;
2568           rejected |= CATEGORY_MASK_ISO_7BIT;
2569           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2570               & CODING_ISO_FLAG_SINGLE_SHIFT)
2571             found |= CATEGORY_MASK_ISO_8_1;
2572           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2573               & CODING_ISO_FLAG_SINGLE_SHIFT)
2574             found |= CATEGORY_MASK_ISO_8_2;
2575           goto check_extra_latin;
2576
2577         default:
2578           if (c < 0x80)
2579             {
2580               single_shifting = 0;
2581               break;
2582             }
2583           if (c >= 0xA0)
2584             {
2585               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2586               found |= CATEGORY_MASK_ISO_8_1;
2587               /* Check the length of succeeding codes of the range
2588                  0xA0..0FF.  If the byte length is even, we include
2589                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2590                  only when we are not single shifting.  */
2591               if (! single_shifting
2592                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2593                 {
2594                   int i = 1;
2595                   while (src < src_end)
2596                     {
2597                       ONE_MORE_BYTE (c);
2598                       if (c < 0xA0)
2599                         break;
2600                       i++;
2601                     }
2602
2603                   if (i & 1 && src < src_end)
2604                     rejected |= CATEGORY_MASK_ISO_8_2;
2605                   else
2606                     found |= CATEGORY_MASK_ISO_8_2;
2607                 }
2608               break;
2609             }
2610         check_extra_latin:
2611           single_shifting = 0;
2612           if (! VECTORP (Vlatin_extra_code_table)
2613               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2614             {
2615               rejected = CATEGORY_MASK_ISO;
2616               break;
2617             }
2618           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2619               & CODING_ISO_FLAG_LATIN_EXTRA)
2620             found |= CATEGORY_MASK_ISO_8_1;
2621           else
2622             rejected |= CATEGORY_MASK_ISO_8_1;
2623           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2624               & CODING_ISO_FLAG_LATIN_EXTRA)
2625             found |= CATEGORY_MASK_ISO_8_2;
2626           else
2627             rejected |= CATEGORY_MASK_ISO_8_2;
2628         }
2629     }
2630   detect_info->rejected |= CATEGORY_MASK_ISO;
2631   return 0;
2632
2633  no_more_source:
2634   detect_info->rejected |= rejected;
2635   detect_info->found |= (found & ~rejected);
2636   return 1;
2637 }
2638
2639
2640 /* Set designation state into CODING.  */
2641 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2642   do {                                                                  \
2643     int id, prev;                                                       \
2644                                                                         \
2645     if (final < '0' || final >= 128                                     \
2646         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2647         || !SAFE_CHARSET_P (coding, id))                                \
2648       {                                                                 \
2649         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2650         goto invalid_code;                                              \
2651       }                                                                 \
2652     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2653     if (id == charset_jisx0201_roman)                                   \
2654       {                                                                 \
2655         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2656           id = charset_ascii;                                           \
2657       }                                                                 \
2658     else if (id == charset_jisx0208_1978)                               \
2659       {                                                                 \
2660         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2661           id = charset_jisx0208;                                        \
2662       }                                                                 \
2663     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2664     /* If there was an invalid designation to REG previously, and this  \
2665        designation is ASCII to REG, we should keep this designation     \
2666        sequence.  */                                                    \
2667     if (prev == -2 && id == charset_ascii)                              \
2668       goto invalid_code;                                                \
2669   } while (0)
2670
2671
2672 #define MAYBE_FINISH_COMPOSITION()                              \
2673   do {                                                          \
2674     int i;                                                      \
2675     if (composition_state == COMPOSING_NO)                      \
2676       break;                                                    \
2677     /* It is assured that we have enough room for producing     \
2678        characters stored in the table `components'.  */         \
2679     if (charbuf + component_idx > charbuf_end)                  \
2680       goto no_more_source;                                      \
2681     composition_state = COMPOSING_NO;                           \
2682     if (method == COMPOSITION_RELATIVE                          \
2683         || method == COMPOSITION_WITH_ALTCHARS)                 \
2684       {                                                         \
2685         for (i = 0; i < component_idx; i++)                     \
2686           *charbuf++ = components[i];                           \
2687         char_offset += component_idx;                           \
2688       }                                                         \
2689     else                                                        \
2690       {                                                         \
2691         for (i = 0; i < component_idx; i += 2)                  \
2692           *charbuf++ = components[i];                           \
2693         char_offset += (component_idx / 2) + 1;                 \
2694       }                                                         \
2695   } while (0)
2696
2697
2698 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2699    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2700    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2701    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2702    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2703   */
2704
2705 #define DECODE_COMPOSITION_START(c1)                                    \
2706   do {                                                                  \
2707     if (c1 == '0'                                                       \
2708         && composition_state == COMPOSING_COMPONENT_RULE)               \
2709       {                                                                 \
2710         component_len = component_idx;                                  \
2711         composition_state = COMPOSING_CHAR;                             \
2712       }                                                                 \
2713     else                                                                \
2714       {                                                                 \
2715         unsigned char *p;                                               \
2716                                                                         \
2717         MAYBE_FINISH_COMPOSITION ();                                    \
2718         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
2719           goto no_more_source;                                          \
2720         for (p = src; p < src_end - 1; p++)                             \
2721           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
2722             break;                                                      \
2723         if (p == src_end - 1)                                           \
2724           {                                                             \
2725             if (coding->mode & CODING_MODE_LAST_BLOCK)                  \
2726               goto invalid_code;                                        \
2727             goto no_more_source;                                        \
2728           }                                                             \
2729                                                                         \
2730         /* This is surely the start of a composition.  */               \
2731         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
2732                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
2733                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
2734                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
2735         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
2736                              : COMPOSING_COMPONENT_CHAR);               \
2737         component_idx = component_len = 0;                              \
2738       }                                                                 \
2739   } while (0)
2740
2741
2742 /* Handle compositoin end sequence ESC 1.  */
2743
2744 #define DECODE_COMPOSITION_END()                                        \
2745   do {                                                                  \
2746     int nchars = (component_len > 0 ? component_idx - component_len     \
2747                   : method == COMPOSITION_RELATIVE ? component_idx      \
2748                   : (component_idx + 1) / 2);                           \
2749     int i;                                                              \
2750     int *saved_charbuf = charbuf;                                       \
2751     int from = coding->produced_char + char_offset;                     \
2752     int to = from + nchars;                                             \
2753                                                                         \
2754     ADD_COMPOSITION_DATA (charbuf, from, to, method);                   \
2755     if (method != COMPOSITION_RELATIVE)                                 \
2756       {                                                                 \
2757         if (component_len == 0)                                         \
2758           for (i = 0; i < component_idx; i++)                           \
2759             *charbuf++ = components[i];                                 \
2760         else                                                            \
2761           for (i = 0; i < component_len; i++)                           \
2762             *charbuf++ = components[i];                                 \
2763         *saved_charbuf = saved_charbuf - charbuf;                       \
2764       }                                                                 \
2765     if (method == COMPOSITION_WITH_RULE)                                \
2766       for (i = 0; i < component_idx; i += 2, char_offset++)             \
2767         *charbuf++ = components[i];                                     \
2768     else                                                                \
2769       for (i = component_len; i < component_idx; i++, char_offset++)    \
2770         *charbuf++ = components[i];                                     \
2771     coding->annotated = 1;                                              \
2772     composition_state = COMPOSING_NO;                                   \
2773   } while (0)
2774
2775
2776 /* Decode a composition rule from the byte C1 (and maybe one more byte
2777    from SRC) and store one encoded composition rule in
2778    coding->cmp_data.  */
2779
2780 #define DECODE_COMPOSITION_RULE(c1)                                     \
2781   do {                                                                  \
2782     (c1) -= 32;                                                         \
2783     if (c1 < 81)                /* old format (before ver.21) */        \
2784       {                                                                 \
2785         int gref = (c1) / 9;                                            \
2786         int nref = (c1) % 9;                                            \
2787         if (gref == 4) gref = 10;                                       \
2788         if (nref == 4) nref = 10;                                       \
2789         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
2790       }                                                                 \
2791     else if (c1 < 93)           /* new format (after ver.21) */         \
2792       {                                                                 \
2793         ONE_MORE_BYTE (c2);                                             \
2794         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
2795       }                                                                 \
2796     else                                                                \
2797       c1 = 0;                                                           \
2798   } while (0)
2799
2800
2801 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2802
2803 static void
2804 decode_coding_iso_2022 (coding)
2805      struct coding_system *coding;
2806 {
2807   unsigned char *src = coding->source + coding->consumed;
2808   unsigned char *src_end = coding->source + coding->src_bytes;
2809   unsigned char *src_base;
2810   int *charbuf = coding->charbuf;
2811   int *charbuf_end
2812     = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
2813   int consumed_chars = 0, consumed_chars_base;
2814   int multibytep = coding->src_multibyte;
2815   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
2816   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2817   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2818   struct charset *charset;
2819   int c;
2820   /* For handling composition sequence.  */
2821 #define COMPOSING_NO                    0
2822 #define COMPOSING_CHAR                  1
2823 #define COMPOSING_RULE                  2
2824 #define COMPOSING_COMPONENT_CHAR        3
2825 #define COMPOSING_COMPONENT_RULE        4
2826
2827   int composition_state = COMPOSING_NO;
2828   enum composition_method method;
2829   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2830   int component_idx;
2831   int component_len;
2832   Lisp_Object attrs, eol_type, charset_list;
2833   int char_offset = coding->produced_char;
2834   int last_offset = char_offset;
2835   int last_id = charset_ascii;
2836
2837   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2838   setup_iso_safe_charsets (attrs);
2839
2840   while (1)
2841     {
2842       int c1, c2;
2843
2844       src_base = src;
2845       consumed_chars_base = consumed_chars;
2846
2847       if (charbuf >= charbuf_end)
2848         break;
2849
2850       ONE_MORE_BYTE (c1);
2851
2852       /* We produce at most one character.  */
2853       switch (iso_code_class [c1])
2854         {
2855         case ISO_0x20_or_0x7F:
2856           if (composition_state != COMPOSING_NO)
2857             {
2858               if (composition_state == COMPOSING_RULE
2859                   || composition_state == COMPOSING_COMPONENT_RULE)
2860                 {
2861                   DECODE_COMPOSITION_RULE (c1);
2862                   components[component_idx++] = c1;
2863                   composition_state--;
2864                   continue;
2865                 }
2866             }
2867           if (charset_id_0 < 0
2868               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2869             /* This is SPACE or DEL.  */
2870             charset = CHARSET_FROM_ID (charset_ascii);
2871           else
2872             charset = CHARSET_FROM_ID (charset_id_0);
2873           break;
2874
2875         case ISO_graphic_plane_0:
2876           if (composition_state != COMPOSING_NO)
2877             {
2878               if (composition_state == COMPOSING_RULE
2879                   || composition_state == COMPOSING_COMPONENT_RULE)
2880                 {
2881                   DECODE_COMPOSITION_RULE (c1);
2882                   components[component_idx++] = c1;
2883                   composition_state--;
2884                   continue;
2885                 }
2886             }
2887           charset = CHARSET_FROM_ID (charset_id_0);
2888           break;
2889
2890         case ISO_0xA0_or_0xFF:
2891           if (charset_id_1 < 0
2892               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2893               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2894             goto invalid_code;
2895           /* This is a graphic character, we fall down ... */
2896
2897         case ISO_graphic_plane_1:
2898           if (charset_id_1 < 0)
2899             goto invalid_code;
2900           charset = CHARSET_FROM_ID (charset_id_1);
2901           break;
2902
2903         case ISO_carriage_return:
2904           if (c1 == '\r')
2905             {
2906               if (EQ (eol_type, Qdos))
2907                 {
2908                   if (src == src_end)
2909                     {
2910                       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
2911                       goto no_more_source;
2912                     }
2913                   if (*src == '\n')
2914                     ONE_MORE_BYTE (c1);
2915                 }
2916               else if (EQ (eol_type, Qmac))
2917                 c1 = '\n';
2918             }
2919           /* fall through */
2920
2921         case ISO_control_0:
2922           MAYBE_FINISH_COMPOSITION ();
2923           charset = CHARSET_FROM_ID (charset_ascii);
2924           break;
2925
2926         case ISO_control_1:
2927           MAYBE_FINISH_COMPOSITION ();
2928           goto invalid_code;
2929
2930         case ISO_shift_out:
2931           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2932               || CODING_ISO_DESIGNATION (coding, 1) < 0)
2933             goto invalid_code;
2934           CODING_ISO_INVOCATION (coding, 0) = 1;
2935           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2936           continue;
2937
2938         case ISO_shift_in:
2939           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2940             goto invalid_code;
2941           CODING_ISO_INVOCATION (coding, 0) = 0;
2942           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2943           continue;
2944
2945         case ISO_single_shift_2_7:
2946         case ISO_single_shift_2:
2947           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2948             goto invalid_code;
2949           /* SS2 is handled as an escape sequence of ESC 'N' */
2950           c1 = 'N';
2951           goto label_escape_sequence;
2952
2953         case ISO_single_shift_3:
2954           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2955             goto invalid_code;
2956           /* SS2 is handled as an escape sequence of ESC 'O' */
2957           c1 = 'O';
2958           goto label_escape_sequence;
2959
2960         case ISO_control_sequence_introducer:
2961           /* CSI is handled as an escape sequence of ESC '[' ...  */
2962           c1 = '[';
2963           goto label_escape_sequence;
2964
2965         case ISO_escape:
2966           ONE_MORE_BYTE (c1);
2967         label_escape_sequence:
2968           /* Escape sequences handled here are invocation,
2969              designation, direction specification, and character
2970              composition specification.  */
2971           switch (c1)
2972             {
2973             case '&':           /* revision of following character set */
2974               ONE_MORE_BYTE (c1);
2975               if (!(c1 >= '@' && c1 <= '~'))
2976                 goto invalid_code;
2977               ONE_MORE_BYTE (c1);
2978               if (c1 != ISO_CODE_ESC)
2979                 goto invalid_code;
2980               ONE_MORE_BYTE (c1);
2981               goto label_escape_sequence;
2982
2983             case '$':           /* designation of 2-byte character set */
2984               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2985                 goto invalid_code;
2986               ONE_MORE_BYTE (c1);
2987               if (c1 >= '@' && c1 <= 'B')
2988                 {       /* designation of JISX0208.1978, GB2312.1980,
2989                            or JISX0208.1980 */
2990                   DECODE_DESIGNATION (0, 2, 0, c1);
2991                 }
2992               else if (c1 >= 0x28 && c1 <= 0x2B)
2993                 {       /* designation of DIMENSION2_CHARS94 character set */
2994                   ONE_MORE_BYTE (c2);
2995                   DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
2996                 }
2997               else if (c1 >= 0x2C && c1 <= 0x2F)
2998                 {       /* designation of DIMENSION2_CHARS96 character set */
2999                   ONE_MORE_BYTE (c2);
3000                   DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
3001                 }
3002               else
3003                 goto invalid_code;
3004               /* We must update these variables now.  */
3005               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3006               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3007               continue;
3008
3009             case 'n':           /* invocation of locking-shift-2 */
3010               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3011                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3012                 goto invalid_code;
3013               CODING_ISO_INVOCATION (coding, 0) = 2;
3014               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3015               continue;
3016
3017             case 'o':           /* invocation of locking-shift-3 */
3018               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3019                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3020                 goto invalid_code;
3021               CODING_ISO_INVOCATION (coding, 0) = 3;
3022               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3023               continue;
3024
3025             case 'N':           /* invocation of single-shift-2 */
3026               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3027                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3028                 goto invalid_code;
3029               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
3030               ONE_MORE_BYTE (c1);
3031               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3032                 goto invalid_code;
3033               break;
3034
3035             case 'O':           /* invocation of single-shift-3 */
3036               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3037                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3038                 goto invalid_code;
3039               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
3040               ONE_MORE_BYTE (c1);
3041               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3042                 goto invalid_code;
3043               break;
3044
3045             case '0': case '2': case '3': case '4': /* start composition */
3046               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3047                 goto invalid_code;
3048               DECODE_COMPOSITION_START (c1);
3049               continue;
3050
3051             case '1':           /* end composition */
3052               if (composition_state == COMPOSING_NO)
3053                 goto invalid_code;
3054               DECODE_COMPOSITION_END ();
3055               continue;
3056
3057             case '[':           /* specification of direction */
3058               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3059                 goto invalid_code;
3060               /* For the moment, nested direction is not supported.
3061                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3062                  left-to-right, and nozero means right-to-left.  */
3063               ONE_MORE_BYTE (c1);
3064               switch (c1)
3065                 {
3066                 case ']':       /* end of the current direction */
3067                   coding->mode &= ~CODING_MODE_DIRECTION;
3068
3069                 case '0':       /* end of the current direction */
3070                 case '1':       /* start of left-to-right direction */
3071                   ONE_MORE_BYTE (c1);
3072                   if (c1 == ']')
3073                     coding->mode &= ~CODING_MODE_DIRECTION;
3074                   else
3075                     goto invalid_code;
3076                   break;
3077
3078                 case '2':       /* start of right-to-left direction */
3079                   ONE_MORE_BYTE (c1);
3080                   if (c1 == ']')
3081                     coding->mode |= CODING_MODE_DIRECTION;
3082                   else
3083                     goto invalid_code;
3084                   break;
3085
3086                 default:
3087                   goto invalid_code;
3088                 }
3089               continue;
3090
3091             default:
3092               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3093                 goto invalid_code;
3094               if (c1 >= 0x28 && c1 <= 0x2B)
3095                 {       /* designation of DIMENSION1_CHARS94 character set */
3096                   ONE_MORE_BYTE (c2);
3097                   DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
3098                 }
3099               else if (c1 >= 0x2C && c1 <= 0x2F)
3100                 {       /* designation of DIMENSION1_CHARS96 character set */
3101                   ONE_MORE_BYTE (c2);
3102                   DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
3103                 }
3104               else
3105                 goto invalid_code;
3106               /* We must update these variables now.  */
3107               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3108               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3109               continue;
3110             }
3111         }
3112
3113       if (charset->id != charset_ascii
3114           && last_id != charset->id)
3115         {
3116           if (last_id != charset_ascii)
3117             ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3118           last_id = charset->id;
3119           last_offset = char_offset;
3120         }
3121
3122       /* Now we know CHARSET and 1st position code C1 of a character.
3123          Produce a decoded character while getting 2nd position code
3124          C2 if necessary.  */
3125       c1 &= 0x7F;
3126       if (CHARSET_DIMENSION (charset) > 1)
3127         {
3128           ONE_MORE_BYTE (c2);
3129           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3130             /* C2 is not in a valid range.  */
3131             goto invalid_code;
3132           c1 = (c1 << 8) | (c2 & 0x7F);
3133           if (CHARSET_DIMENSION (charset) > 2)
3134             {
3135               ONE_MORE_BYTE (c2);
3136               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3137                 /* C2 is not in a valid range.  */
3138                 goto invalid_code;
3139               c1 = (c1 << 8) | (c2 & 0x7F);
3140             }
3141         }
3142
3143       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3144       if (c < 0)
3145         {
3146           MAYBE_FINISH_COMPOSITION ();
3147           for (; src_base < src; src_base++, char_offset++)
3148             {
3149               if (ASCII_BYTE_P (*src_base))
3150                 *charbuf++ = *src_base;
3151               else
3152                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3153               char_offset++;
3154             }
3155         }
3156       else if (composition_state == COMPOSING_NO)
3157         {
3158           *charbuf++ = c;
3159           char_offset++;
3160         }
3161       else
3162         {
3163           components[component_idx++] = c;
3164           if (method == COMPOSITION_WITH_RULE
3165               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3166                   && composition_state == COMPOSING_COMPONENT_CHAR))
3167             composition_state++;
3168         }
3169       continue;
3170
3171     invalid_code:
3172       MAYBE_FINISH_COMPOSITION ();
3173       src = src_base;
3174       consumed_chars = consumed_chars_base;
3175       ONE_MORE_BYTE (c);
3176       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3177       char_offset++;
3178       coding->errors++;
3179     }
3180
3181  no_more_source:
3182   if (last_id != charset_ascii)
3183     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3184   coding->consumed_char += consumed_chars_base;
3185   coding->consumed = src_base - coding->source;
3186   coding->charbuf_used = charbuf - coding->charbuf;
3187 }
3188
3189
3190 /* ISO2022 encoding stuff.  */
3191
3192 /*
3193    It is not enough to say just "ISO2022" on encoding, we have to
3194    specify more details.  In Emacs, each coding system of ISO2022
3195    variant has the following specifications:
3196         1. Initial designation to G0 thru G3.
3197         2. Allows short-form designation?
3198         3. ASCII should be designated to G0 before control characters?
3199         4. ASCII should be designated to G0 at end of line?
3200         5. 7-bit environment or 8-bit environment?
3201         6. Use locking-shift?
3202         7. Use Single-shift?
3203    And the following two are only for Japanese:
3204         8. Use ASCII in place of JIS0201-1976-Roman?
3205         9. Use JISX0208-1983 in place of JISX0208-1978?
3206    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3207    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3208    details.
3209 */
3210
3211 /* Produce codes (escape sequence) for designating CHARSET to graphic
3212    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3213    '@', 'A', or 'B' and the coding system CODING allows, produce
3214    designation sequence of short-form.  */
3215
3216 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3217   do {                                                                  \
3218     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3219     char *intermediate_char_94 = "()*+";                                \
3220     char *intermediate_char_96 = ",-./";                                \
3221     int revision = -1;                                                  \
3222     int c;                                                              \
3223                                                                         \
3224     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3225       revision = CHARSET_ISO_REVISION (charset);                        \
3226                                                                         \
3227     if (revision >= 0)                                                  \
3228       {                                                                 \
3229         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3230         EMIT_ONE_BYTE ('@' + revision);                                 \
3231       }                                                                 \
3232     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3233     if (CHARSET_DIMENSION (charset) == 1)                               \
3234       {                                                                 \
3235         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3236           c = intermediate_char_94[reg];                                \
3237         else                                                            \
3238           c = intermediate_char_96[reg];                                \
3239         EMIT_ONE_ASCII_BYTE (c);                                        \
3240       }                                                                 \
3241     else                                                                \
3242       {                                                                 \
3243         EMIT_ONE_ASCII_BYTE ('$');                                      \
3244         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3245           {                                                             \
3246             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3247                 || reg != 0                                             \
3248                 || final_char < '@' || final_char > 'B')                \
3249               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3250           }                                                             \
3251         else                                                            \
3252           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3253       }                                                                 \
3254     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3255                                                                         \
3256     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3257   } while (0)
3258
3259
3260 /* The following two macros produce codes (control character or escape
3261    sequence) for ISO2022 single-shift functions (single-shift-2 and
3262    single-shift-3).  */
3263
3264 #define ENCODE_SINGLE_SHIFT_2                                           \
3265   do {                                                                  \
3266     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3267       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3268     else                                                                \
3269       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3270     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3271   } while (0)
3272
3273
3274 #define ENCODE_SINGLE_SHIFT_3                                           \
3275   do {                                                                  \
3276     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3277       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3278     else                                                                \
3279       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3280     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3281   } while (0)
3282
3283
3284 /* The following four macros produce codes (control character or
3285    escape sequence) for ISO2022 locking-shift functions (shift-in,
3286    shift-out, locking-shift-2, and locking-shift-3).  */
3287
3288 #define ENCODE_SHIFT_IN                                 \
3289   do {                                                  \
3290     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3291     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3292   } while (0)
3293
3294
3295 #define ENCODE_SHIFT_OUT                                \
3296   do {                                                  \
3297     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3298     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3299   } while (0)
3300
3301
3302 #define ENCODE_LOCKING_SHIFT_2                          \
3303   do {                                                  \
3304     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3305     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3306   } while (0)
3307
3308
3309 #define ENCODE_LOCKING_SHIFT_3                          \
3310   do {                                                  \
3311     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3312     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3313   } while (0)
3314
3315
3316 /* Produce codes for a DIMENSION1 character whose character set is
3317    CHARSET and whose position-code is C1.  Designation and invocation
3318    sequences are also produced in advance if necessary.  */
3319
3320 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3321   do {                                                                  \
3322     int id = CHARSET_ID (charset);                                      \
3323                                                                         \
3324     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3325         && id == charset_ascii)                                         \
3326       {                                                                 \
3327         id = charset_jisx0201_roman;                                    \
3328         charset = CHARSET_FROM_ID (id);                                 \
3329       }                                                                 \
3330                                                                         \
3331     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3332       {                                                                 \
3333         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3334           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3335         else                                                            \
3336           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3337         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3338         break;                                                          \
3339       }                                                                 \
3340     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3341       {                                                                 \
3342         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3343         break;                                                          \
3344       }                                                                 \
3345     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3346       {                                                                 \
3347         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3348         break;                                                          \
3349       }                                                                 \
3350     else                                                                \
3351       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3352          must invoke it, or, at first, designate it to some graphic     \
3353          register.  Then repeat the loop to actually produce the        \
3354          character.  */                                                 \
3355       dst = encode_invocation_designation (charset, coding, dst,        \
3356                                            &produced_chars);            \
3357   } while (1)
3358
3359
3360 /* Produce codes for a DIMENSION2 character whose character set is
3361    CHARSET and whose position-codes are C1 and C2.  Designation and
3362    invocation codes are also produced in advance if necessary.  */
3363
3364 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3365   do {                                                                  \
3366     int id = CHARSET_ID (charset);                                      \
3367                                                                         \
3368     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3369         && id == charset_jisx0208)                                      \
3370       {                                                                 \
3371         id = charset_jisx0208_1978;                                     \
3372         charset = CHARSET_FROM_ID (id);                                 \
3373       }                                                                 \
3374                                                                         \
3375     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3376       {                                                                 \
3377         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3378           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3379         else                                                            \
3380           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3381         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3382         break;                                                          \
3383       }                                                                 \
3384     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3385       {                                                                 \
3386         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3387         break;                                                          \
3388       }                                                                 \
3389     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3390       {                                                                 \
3391         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3392         break;                                                          \
3393       }                                                                 \
3394     else                                                                \
3395       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3396          must invoke it, or, at first, designate it to some graphic     \
3397          register.  Then repeat the loop to actually produce the        \
3398          character.  */                                                 \
3399       dst = encode_invocation_designation (charset, coding, dst,        \
3400                                            &produced_chars);            \
3401   } while (1)
3402
3403
3404 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3405   do {                                                                     \
3406     int code = ENCODE_CHAR ((charset),(c));                                \
3407                                                                            \
3408     if (CHARSET_DIMENSION (charset) == 1)                                  \
3409       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3410     else                                                                   \
3411       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3412   } while (0)
3413
3414
3415 /* Produce designation and invocation codes at a place pointed by DST
3416    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3417    Return new DST.  */
3418
3419 unsigned char *
3420 encode_invocation_designation (charset, coding, dst, p_nchars)
3421      struct charset *charset;
3422      struct coding_system *coding;
3423      unsigned char *dst;
3424      int *p_nchars;
3425 {
3426   int multibytep = coding->dst_multibyte;
3427   int produced_chars = *p_nchars;
3428   int reg;                      /* graphic register number */
3429   int id = CHARSET_ID (charset);
3430
3431   /* At first, check designations.  */
3432   for (reg = 0; reg < 4; reg++)
3433     if (id == CODING_ISO_DESIGNATION (coding, reg))
3434       break;
3435
3436   if (reg >= 4)
3437     {
3438       /* CHARSET is not yet designated to any graphic registers.  */
3439       /* At first check the requested designation.  */
3440       reg = CODING_ISO_REQUEST (coding, id);
3441       if (reg < 0)
3442         /* Since CHARSET requests no special designation, designate it
3443            to graphic register 0.  */
3444         reg = 0;
3445
3446       ENCODE_DESIGNATION (charset, reg, coding);
3447     }
3448
3449   if (CODING_ISO_INVOCATION (coding, 0) != reg
3450       && CODING_ISO_INVOCATION (coding, 1) != reg)
3451     {
3452       /* Since the graphic register REG is not invoked to any graphic
3453          planes, invoke it to graphic plane 0.  */
3454       switch (reg)
3455         {
3456         case 0:                 /* graphic register 0 */
3457           ENCODE_SHIFT_IN;
3458           break;
3459
3460         case 1:                 /* graphic register 1 */
3461           ENCODE_SHIFT_OUT;
3462           break;
3463
3464         case 2:                 /* graphic register 2 */
3465           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3466             ENCODE_SINGLE_SHIFT_2;
3467           else
3468             ENCODE_LOCKING_SHIFT_2;
3469           break;
3470
3471         case 3:                 /* graphic register 3 */
3472           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3473             ENCODE_SINGLE_SHIFT_3;
3474           else
3475             ENCODE_LOCKING_SHIFT_3;
3476           break;
3477         }
3478     }
3479
3480   *p_nchars = produced_chars;
3481   return dst;
3482 }
3483
3484 /* The following three macros produce codes for indicating direction
3485    of text.  */
3486 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3487   do {                                                                  \
3488     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3489       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3490     else                                                                \
3491       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3492   } while (0)
3493
3494
3495 #define ENCODE_DIRECTION_R2L()                  \
3496   do {                                          \
3497     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3498     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3499   } while (0)
3500
3501
3502 #define ENCODE_DIRECTION_L2R()                  \
3503   do {                                          \
3504     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3505     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3506   } while (0)
3507
3508
3509 /* Produce codes for designation and invocation to reset the graphic
3510    planes and registers to initial state.  */
3511 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3512   do {                                                                  \
3513     int reg;                                                            \
3514     struct charset *charset;                                            \
3515                                                                         \
3516     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3517       ENCODE_SHIFT_IN;                                                  \
3518     for (reg = 0; reg < 4; reg++)                                       \
3519       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3520           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3521               != CODING_ISO_INITIAL (coding, reg)))                     \
3522         {                                                               \
3523           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3524           ENCODE_DESIGNATION (charset, reg, coding);                    \
3525         }                                                               \
3526   } while (0)
3527
3528
3529 /* Produce designation sequences of charsets in the line started from
3530    SRC to a place pointed by DST, and return updated DST.
3531
3532    If the current block ends before any end-of-line, we may fail to
3533    find all the necessary designations.  */
3534
3535 static unsigned char *
3536 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3537      struct coding_system *coding;
3538      int *charbuf, *charbuf_end;
3539      unsigned char *dst;
3540 {
3541   struct charset *charset;
3542   /* Table of charsets to be designated to each graphic register.  */
3543   int r[4];
3544   int c, found = 0, reg;
3545   int produced_chars = 0;
3546   int multibytep = coding->dst_multibyte;
3547   Lisp_Object attrs;
3548   Lisp_Object charset_list;
3549
3550   attrs = CODING_ID_ATTRS (coding->id);
3551   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3552   if (EQ (charset_list, Qiso_2022))
3553     charset_list = Viso_2022_charset_list;
3554
3555   for (reg = 0; reg < 4; reg++)
3556     r[reg] = -1;
3557
3558   while (found < 4)
3559     {
3560       int id;
3561
3562       c = *charbuf++;
3563       if (c == '\n')
3564         break;
3565       charset = char_charset (c, charset_list, NULL);
3566       id = CHARSET_ID (charset);
3567       reg = CODING_ISO_REQUEST (coding, id);
3568       if (reg >= 0 && r[reg] < 0)
3569         {
3570           found++;
3571           r[reg] = id;
3572         }
3573     }
3574
3575   if (found)
3576     {
3577       for (reg = 0; reg < 4; reg++)
3578         if (r[reg] >= 0
3579             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3580           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3581     }
3582
3583   return dst;
3584 }
3585
3586 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
3587
3588 static int
3589 encode_coding_iso_2022 (coding)
3590      struct coding_system *coding;
3591 {
3592   int multibytep = coding->dst_multibyte;
3593   int *charbuf = coding->charbuf;
3594   int *charbuf_end = charbuf + coding->charbuf_used;
3595   unsigned char *dst = coding->destination + coding->produced;
3596   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3597   int safe_room = 16;
3598   int bol_designation
3599     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3600        && CODING_ISO_BOL (coding));
3601   int produced_chars = 0;
3602   Lisp_Object attrs, eol_type, charset_list;
3603   int ascii_compatible;
3604   int c;
3605   int preferred_charset_id = -1;
3606
3607   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3608   setup_iso_safe_charsets (attrs);
3609   /* Charset list may have been changed.  */
3610   charset_list = CODING_ATTR_CHARSET_LIST (attrs);              \
3611   coding->safe_charsets
3612     = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
3613
3614   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3615
3616   while (charbuf < charbuf_end)
3617     {
3618       ASSURE_DESTINATION (safe_room);
3619
3620       if (bol_designation)
3621         {
3622           unsigned char *dst_prev = dst;
3623
3624           /* We have to produce designation sequences if any now.  */
3625           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3626           bol_designation = 0;
3627           /* We are sure that designation sequences are all ASCII bytes.  */
3628           produced_chars += dst - dst_prev;
3629         }
3630
3631       c = *charbuf++;
3632
3633       if (c < 0)
3634         {
3635           /* Handle an annotation.  */
3636           switch (*charbuf)
3637             {
3638             case CODING_ANNOTATE_COMPOSITION_MASK:
3639               /* Not yet implemented.  */
3640               break;
3641             case CODING_ANNOTATE_CHARSET_MASK:
3642               preferred_charset_id = charbuf[3];
3643               if (preferred_charset_id >= 0
3644                   && NILP (Fmemq (make_number (preferred_charset_id),
3645                                   charset_list)))
3646                 preferred_charset_id = -1;
3647               break;
3648             default:
3649               abort ();
3650             }
3651           charbuf += -c - 1;
3652           continue;
3653         }
3654
3655       /* Now encode the character C.  */
3656       if (c < 0x20 || c == 0x7F)
3657         {
3658           if (c == '\n'
3659               || (c == '\r' && EQ (eol_type, Qmac)))
3660             {
3661               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3662                 ENCODE_RESET_PLANE_AND_REGISTER ();
3663               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3664                 {
3665                   int i;
3666
3667                   for (i = 0; i < 4; i++)
3668                     CODING_ISO_DESIGNATION (coding, i)
3669                       = CODING_ISO_INITIAL (coding, i);
3670                 }
3671               bol_designation
3672                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3673             }
3674           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3675             ENCODE_RESET_PLANE_AND_REGISTER ();
3676           EMIT_ONE_ASCII_BYTE (c);
3677         }
3678       else if (ASCII_CHAR_P (c))
3679         {
3680           if (ascii_compatible)
3681             EMIT_ONE_ASCII_BYTE (c);
3682           else
3683             {
3684               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3685               ENCODE_ISO_CHARACTER (charset, c);
3686             }
3687         }
3688       else if (CHAR_BYTE8_P (c))
3689         {
3690           c = CHAR_TO_BYTE8 (c);
3691           EMIT_ONE_BYTE (c);
3692         }
3693       else
3694         {
3695           struct charset *charset;
3696
3697           if (preferred_charset_id >= 0)
3698             {
3699               charset = CHARSET_FROM_ID (preferred_charset_id);
3700               if (! CHAR_CHARSET_P (c, charset))
3701                 charset = char_charset (c, charset_list, NULL);
3702             }
3703           else
3704             charset = char_charset (c, charset_list, NULL);
3705           if (!charset)
3706             {
3707               if (coding->mode & CODING_MODE_SAFE_ENCODING)
3708                 {
3709                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3710                   charset = CHARSET_FROM_ID (charset_ascii);
3711                 }
3712               else
3713                 {
3714                   c = coding->default_char;
3715                   charset = char_charset (c, charset_list, NULL);
3716                 }
3717             }
3718           ENCODE_ISO_CHARACTER (charset, c);
3719         }
3720     }
3721
3722   if (coding->mode & CODING_MODE_LAST_BLOCK
3723       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3724     {
3725       ASSURE_DESTINATION (safe_room);
3726       ENCODE_RESET_PLANE_AND_REGISTER ();
3727     }
3728   coding->result = CODING_RESULT_SUCCESS;
3729   CODING_ISO_BOL (coding) = bol_designation;
3730   coding->produced_char += produced_chars;
3731   coding->produced = dst - coding->destination;
3732   return 0;
3733 }
3734
3735 \f
3736 /*** 8,9. SJIS and BIG5 handlers ***/
3737
3738 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3739    quite widely.  So, for the moment, Emacs supports them in the bare
3740    C code.  But, in the future, they may be supported only by CCL.  */
3741
3742 /* SJIS is a coding system encoding three character sets: ASCII, right
3743    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
3744    as is.  A character of charset katakana-jisx0201 is encoded by
3745    "position-code + 0x80".  A character of charset japanese-jisx0208
3746    is encoded in 2-byte but two position-codes are divided and shifted
3747    so that it fit in the range below.
3748
3749    --- CODE RANGE of SJIS ---
3750    (character set)      (range)
3751    ASCII                0x00 .. 0x7F
3752    KATAKANA-JISX0201    0xA0 .. 0xDF
3753    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
3754             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
3755    -------------------------------
3756
3757 */
3758
3759 /* BIG5 is a coding system encoding two character sets: ASCII and
3760    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
3761    character set and is encoded in two-byte.
3762
3763    --- CODE RANGE of BIG5 ---
3764    (character set)      (range)
3765    ASCII                0x00 .. 0x7F
3766    Big5 (1st byte)      0xA1 .. 0xFE
3767         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
3768    --------------------------
3769
3770   */
3771
3772 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3773    Check if a text is encoded in SJIS.  If it is, return
3774    CATEGORY_MASK_SJIS, else return 0.  */
3775
3776 static int
3777 detect_coding_sjis (coding, detect_info)
3778      struct coding_system *coding;
3779      struct coding_detection_info *detect_info;
3780 {
3781   unsigned char *src = coding->source, *src_base = src;
3782   unsigned char *src_end = coding->source + coding->src_bytes;
3783   int multibytep = coding->src_multibyte;
3784   int consumed_chars = 0;
3785   int found = 0;
3786   int c;
3787   int incomplete;
3788
3789   detect_info->checked |= CATEGORY_MASK_SJIS;
3790   /* A coding system of this category is always ASCII compatible.  */
3791   src += coding->head_ascii;
3792
3793   while (1)
3794     {
3795       incomplete = 0;
3796       ONE_MORE_BYTE (c);
3797       incomplete = 1;
3798       if (c < 0x80)
3799         continue;
3800       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3801         {
3802           ONE_MORE_BYTE (c);
3803           if (c < 0x40 || c == 0x7F || c > 0xFC)
3804             break;
3805           found = CATEGORY_MASK_SJIS;
3806         }
3807       else if (c >= 0xA0 && c < 0xE0)
3808         found = CATEGORY_MASK_SJIS;
3809       else
3810         break;
3811     }
3812   detect_info->rejected |= CATEGORY_MASK_SJIS;
3813   return 0;
3814
3815  no_more_source:
3816   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3817     {
3818       detect_info->rejected |= CATEGORY_MASK_SJIS;
3819       return 0;
3820     }
3821   detect_info->found |= found;
3822   return 1;
3823 }
3824
3825 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3826    Check if a text is encoded in BIG5.  If it is, return
3827    CATEGORY_MASK_BIG5, else return 0.  */
3828
3829 static int
3830 detect_coding_big5 (coding, detect_info)
3831      struct coding_system *coding;
3832      struct coding_detection_info *detect_info;
3833 {
3834   unsigned char *src = coding->source, *src_base = src;
3835   unsigned char *src_end = coding->source + coding->src_bytes;
3836   int multibytep = coding->src_multibyte;
3837   int consumed_chars = 0;
3838   int found = 0;
3839   int c;
3840   int incomplete;
3841
3842   detect_info->checked |= CATEGORY_MASK_BIG5;
3843   /* A coding system of this category is always ASCII compatible.  */
3844   src += coding->head_ascii;
3845
3846   while (1)
3847     {
3848       incomplete = 0;
3849       ONE_MORE_BYTE (c);
3850       incomplete = 1;
3851       if (c < 0x80)
3852         continue;
3853       if (c >= 0xA1)
3854         {
3855           ONE_MORE_BYTE (c);
3856           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3857             return 0;
3858           found = CATEGORY_MASK_BIG5;
3859         }
3860       else
3861         break;
3862     }
3863   detect_info->rejected |= CATEGORY_MASK_BIG5;
3864   return 0;
3865
3866  no_more_source:
3867   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3868     {
3869       detect_info->rejected |= CATEGORY_MASK_BIG5;
3870       return 0;
3871     }
3872   detect_info->found |= found;
3873   return 1;
3874 }
3875
3876 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3877    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3878
3879 static void
3880 decode_coding_sjis (coding)
3881      struct coding_system *coding;
3882 {
3883   unsigned char *src = coding->source + coding->consumed;
3884   unsigned char *src_end = coding->source + coding->src_bytes;
3885   unsigned char *src_base;
3886   int *charbuf = coding->charbuf;
3887   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
3888   int consumed_chars = 0, consumed_chars_base;
3889   int multibytep = coding->src_multibyte;
3890   struct charset *charset_roman, *charset_kanji, *charset_kana;
3891   Lisp_Object attrs, eol_type, charset_list, val;
3892   int char_offset = coding->produced_char;
3893   int last_offset = char_offset;
3894   int last_id = charset_ascii;
3895
3896   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3897
3898   val = charset_list;
3899   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3900   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3901   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3902
3903   while (1)
3904     {
3905       int c, c1;
3906
3907       src_base = src;
3908       consumed_chars_base = consumed_chars;
3909
3910       if (charbuf >= charbuf_end)
3911         break;
3912
3913       ONE_MORE_BYTE (c);
3914
3915       if (c == '\r')
3916         {
3917           if (EQ (eol_type, Qdos))
3918             {
3919               if (src == src_end)
3920                 {
3921                   coding->result = CODING_RESULT_INSUFFICIENT_SRC;
3922                   goto no_more_source;
3923                 }
3924               if (*src == '\n')
3925                 ONE_MORE_BYTE (c);
3926             }
3927           else if (EQ (eol_type, Qmac))
3928             c = '\n';
3929         }
3930       else
3931         {
3932           struct charset *charset;
3933
3934           if (c < 0x80)
3935             charset = charset_roman;
3936           else
3937             {
3938               if (c >= 0xF0)
3939                 goto invalid_code;
3940               if (c < 0xA0 || c >= 0xE0)
3941                 {
3942                   /* SJIS -> JISX0208 */
3943                   ONE_MORE_BYTE (c1);
3944                   if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3945                     goto invalid_code;
3946                   c = (c << 8) | c1;
3947                   SJIS_TO_JIS (c);
3948                   charset = charset_kanji;
3949                 }
3950               else
3951                 {
3952                   /* SJIS -> JISX0201-Kana */
3953                   c &= 0x7F;
3954                   charset = charset_kana;
3955                 }
3956             }
3957           if (charset->id != charset_ascii
3958               && last_id != charset->id)
3959             {
3960               if (last_id != charset_ascii)
3961                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3962               last_id = charset->id;
3963               last_offset = char_offset;
3964             }
3965           CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3966         }
3967       *charbuf++ = c;
3968       char_offset++;
3969       continue;
3970
3971     invalid_code:
3972       src = src_base;
3973       consumed_chars = consumed_chars_base;
3974       ONE_MORE_BYTE (c);
3975       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3976       char_offset++;
3977       coding->errors++;
3978     }
3979
3980  no_more_source:
3981   if (last_id != charset_ascii)
3982     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3983   coding->consumed_char += consumed_chars_base;
3984   coding->consumed = src_base - coding->source;
3985   coding->charbuf_used = charbuf - coding->charbuf;
3986 }
3987
3988 static void
3989 decode_coding_big5 (coding)
3990      struct coding_system *coding;
3991 {
3992   unsigned char *src = coding->source + coding->consumed;
3993   unsigned char *src_end = coding->source + coding->src_bytes;
3994   unsigned char *src_base;
3995   int *charbuf = coding->charbuf;
3996   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
3997   int consumed_chars = 0, consumed_chars_base;
3998   int multibytep = coding->src_multibyte;
3999   struct charset *charset_roman, *charset_big5;
4000   Lisp_Object attrs, eol_type, charset_list, val;
4001   int char_offset = coding->produced_char;
4002   int last_offset = char_offset;
4003   int last_id = charset_ascii;
4004
4005   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4006   val = charset_list;
4007   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4008   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4009
4010   while (1)
4011     {
4012       int c, c1;
4013
4014       src_base = src;
4015       consumed_chars_base = consumed_chars;
4016
4017       if (charbuf >= charbuf_end)
4018         break;
4019
4020       ONE_MORE_BYTE (c);
4021
4022       if (c == '\r')
4023         {
4024           if (EQ (eol_type, Qdos))
4025             {
4026               if (src == src_end)
4027                 {
4028                   coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4029                   goto no_more_source;
4030                 }
4031               if (*src == '\n')
4032                 ONE_MORE_BYTE (c);
4033             }
4034           else if (EQ (eol_type, Qmac))
4035             c = '\n';
4036         }
4037       else
4038         {
4039           struct charset *charset;
4040           if (c < 0x80)
4041             charset = charset_roman;
4042           else
4043             {
4044               /* BIG5 -> Big5 */
4045               if (c < 0xA1 || c > 0xFE)
4046                 goto invalid_code;
4047               ONE_MORE_BYTE (c1);
4048               if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4049                 goto invalid_code;
4050               c = c << 8 | c1;
4051               charset = charset_big5;
4052             }
4053           if (charset->id != charset_ascii
4054               && last_id != charset->id)
4055             {
4056               if (last_id != charset_ascii)
4057                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4058               last_id = charset->id;
4059               last_offset = char_offset;
4060             }
4061           CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4062         }
4063
4064       *charbuf++ = c;
4065       char_offset++;
4066       continue;
4067
4068     invalid_code:
4069       src = src_base;
4070       consumed_chars = consumed_chars_base;
4071       ONE_MORE_BYTE (c);
4072       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4073       char_offset++;
4074       coding->errors++;
4075     }
4076
4077  no_more_source:
4078   if (last_id != charset_ascii)
4079     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4080   coding->consumed_char += consumed_chars_base;
4081   coding->consumed = src_base - coding->source;
4082   coding->charbuf_used = charbuf - coding->charbuf;
4083 }
4084
4085 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4086    This function can encode charsets `ascii', `katakana-jisx0201',
4087    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4088    are sure that all these charsets are registered as official charset
4089    (i.e. do not have extended leading-codes).  Characters of other
4090    charsets are produced without any encoding.  If SJIS_P is 1, encode
4091    SJIS text, else encode BIG5 text.  */
4092
4093 static int
4094 encode_coding_sjis (coding)
4095      struct coding_system *coding;
4096 {
4097   int multibytep = coding->dst_multibyte;
4098   int *charbuf = coding->charbuf;
4099   int *charbuf_end = charbuf + coding->charbuf_used;
4100   unsigned char *dst = coding->destination + coding->produced;
4101   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4102   int safe_room = 4;
4103   int produced_chars = 0;
4104   Lisp_Object attrs, eol_type, charset_list, val;
4105   int ascii_compatible;
4106   struct charset *charset_roman, *charset_kanji, *charset_kana;
4107   int c;
4108
4109   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4110   val = charset_list;
4111   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4112   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4113   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4114
4115   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4116
4117   while (charbuf < charbuf_end)
4118     {
4119       ASSURE_DESTINATION (safe_room);
4120       c = *charbuf++;
4121       /* Now encode the character C.  */
4122       if (ASCII_CHAR_P (c) && ascii_compatible)
4123         EMIT_ONE_ASCII_BYTE (c);
4124       else if (CHAR_BYTE8_P (c))
4125         {
4126           c = CHAR_TO_BYTE8 (c);
4127           EMIT_ONE_BYTE (c);
4128         }
4129       else
4130         {
4131           unsigned code;
4132           struct charset *charset = char_charset (c, charset_list, &code);
4133
4134           if (!charset)
4135             {
4136               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4137                 {
4138                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4139                   charset = CHARSET_FROM_ID (charset_ascii);
4140                 }
4141               else
4142                 {
4143                   c = coding->default_char;
4144                   charset = char_charset (c, charset_list, &code);
4145                 }
4146             }
4147           if (code == CHARSET_INVALID_CODE (charset))
4148             abort ();
4149           if (charset == charset_kanji)
4150             {
4151               int c1, c2;
4152               JIS_TO_SJIS (code);
4153               c1 = code >> 8, c2 = code & 0xFF;
4154               EMIT_TWO_BYTES (c1, c2);
4155             }
4156           else if (charset == charset_kana)
4157             EMIT_ONE_BYTE (code | 0x80);
4158           else
4159             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4160         }
4161     }
4162   coding->result = CODING_RESULT_SUCCESS;
4163   coding->produced_char += produced_chars;
4164   coding->produced = dst - coding->destination;
4165   return 0;
4166 }
4167
4168 static int
4169 encode_coding_big5 (coding)
4170      struct coding_system *coding;
4171 {
4172   int multibytep = coding->dst_multibyte;
4173   int *charbuf = coding->charbuf;
4174   int *charbuf_end = charbuf + coding->charbuf_used;
4175   unsigned char *dst = coding->destination + coding->produced;
4176   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4177   int safe_room = 4;
4178   int produced_chars = 0;
4179   Lisp_Object attrs, eol_type, charset_list, val;
4180   int ascii_compatible;
4181   struct charset *charset_roman, *charset_big5;
4182   int c;
4183
4184   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4185   val = charset_list;
4186   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4187   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4188   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4189
4190   while (charbuf < charbuf_end)
4191     {
4192       ASSURE_DESTINATION (safe_room);
4193       c = *charbuf++;
4194       /* Now encode the character C.  */
4195       if (ASCII_CHAR_P (c) && ascii_compatible)
4196         EMIT_ONE_ASCII_BYTE (c);
4197       else if (CHAR_BYTE8_P (c))
4198         {
4199           c = CHAR_TO_BYTE8 (c);
4200           EMIT_ONE_BYTE (c);
4201         }
4202       else
4203         {
4204           unsigned code;
4205           struct charset *charset = char_charset (c, charset_list, &code);
4206
4207           if (! charset)
4208             {
4209               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4210                 {
4211                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4212                   charset = CHARSET_FROM_ID (charset_ascii);
4213                 }
4214               else
4215                 {
4216                   c = coding->default_char;
4217                   charset = char_charset (c, charset_list, &code);
4218                 }
4219             }
4220           if (code == CHARSET_INVALID_CODE (charset))
4221             abort ();
4222           if (charset == charset_big5)
4223             {
4224               int c1, c2;
4225
4226               c1 = code >> 8, c2 = code & 0xFF;
4227               EMIT_TWO_BYTES (c1, c2);
4228             }
4229           else
4230             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4231         }
4232     }
4233   coding->result = CODING_RESULT_SUCCESS;
4234   coding->produced_char += produced_chars;
4235   coding->produced = dst - coding->destination;
4236   return 0;
4237 }
4238
4239 \f
4240 /*** 10. CCL handlers ***/
4241
4242 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4243    Check if a text is encoded in a coding system of which
4244    encoder/decoder are written in CCL program.  If it is, return
4245    CATEGORY_MASK_CCL, else return 0.  */
4246
4247 static int
4248 detect_coding_ccl (coding, detect_info)
4249      struct coding_system *coding;
4250      struct coding_detection_info *detect_info;
4251 {
4252   unsigned char *src = coding->source, *src_base = src;
4253   unsigned char *src_end = coding->source + coding->src_bytes;
4254   int multibytep = coding->src_multibyte;
4255   int consumed_chars = 0;
4256   int found = 0;
4257   unsigned char *valids = CODING_CCL_VALIDS (coding);
4258   int head_ascii = coding->head_ascii;
4259   Lisp_Object attrs;
4260
4261   detect_info->checked |= CATEGORY_MASK_CCL;
4262
4263   coding = &coding_categories[coding_category_ccl];
4264   attrs = CODING_ID_ATTRS (coding->id);
4265   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4266     src += head_ascii;
4267
4268   while (1)
4269     {
4270       int c;
4271       ONE_MORE_BYTE (c);
4272       if (! valids[c])
4273         break;
4274       if ((valids[c] > 1))
4275         found = CATEGORY_MASK_CCL;
4276     }
4277   detect_info->rejected |= CATEGORY_MASK_CCL;
4278   return 0;
4279
4280  no_more_source:
4281   detect_info->found |= found;
4282   return 1;
4283 }
4284
4285 static void
4286 decode_coding_ccl (coding)
4287      struct coding_system *coding;
4288 {
4289   const unsigned char *src = coding->source + coding->consumed;
4290   unsigned char *src_end = coding->source + coding->src_bytes;
4291   int *charbuf = coding->charbuf;
4292   int *charbuf_end = charbuf + coding->charbuf_size;
4293   int consumed_chars = 0;
4294   int multibytep = coding->src_multibyte;
4295   struct ccl_program ccl;
4296   int source_charbuf[1024];
4297   int source_byteidx[1024];
4298   Lisp_Object attrs, eol_type, charset_list, valids;
4299
4300   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4301   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4302
4303   while (src < src_end)
4304     {
4305       const unsigned char *p = src;
4306       int *source, *source_end;
4307       int i = 0;
4308
4309       if (multibytep)
4310         while (i < 1024 && p < src_end)
4311           {
4312             source_byteidx[i] = p - src;
4313             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4314           }
4315       else
4316         while (i < 1024 && p < src_end)
4317           source_charbuf[i++] = *p++;
4318
4319       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4320         ccl.last_block = 1;
4321
4322       source = source_charbuf;
4323       source_end = source + i;
4324       while (source < source_end)
4325         {
4326           ccl_driver (&ccl, source, charbuf,
4327                       source_end - source, charbuf_end - charbuf,
4328                       charset_list);
4329           source += ccl.consumed;
4330           charbuf += ccl.produced;
4331           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4332             break;
4333         }
4334       if (source < source_end)
4335         src += source_byteidx[source - source_charbuf];
4336       else
4337         src = p;
4338       consumed_chars += source - source_charbuf;
4339
4340       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4341           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4342         break;
4343     }
4344
4345   switch (ccl.status)
4346     {
4347     case CCL_STAT_SUSPEND_BY_SRC:
4348       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4349       break;
4350     case CCL_STAT_SUSPEND_BY_DST:
4351       break;
4352     case CCL_STAT_QUIT:
4353     case CCL_STAT_INVALID_CMD:
4354       coding->result = CODING_RESULT_INTERRUPT;
4355       break;
4356     default:
4357       coding->result = CODING_RESULT_SUCCESS;
4358       break;
4359     }
4360   coding->consumed_char += consumed_chars;
4361   coding->consumed = src - coding->source;
4362   coding->charbuf_used = charbuf - coding->charbuf;
4363 }
4364
4365 static int
4366 encode_coding_ccl (coding)
4367      struct coding_system *coding;
4368 {
4369   struct ccl_program ccl;
4370   int multibytep = coding->dst_multibyte;
4371   int *charbuf = coding->charbuf;
4372   int *charbuf_end = charbuf + coding->charbuf_used;
4373   unsigned char *dst = coding->destination + coding->produced;
4374   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4375   unsigned char *adjusted_dst_end = dst_end - 1;
4376   int destination_charbuf[1024];
4377   int i, produced_chars = 0;
4378   Lisp_Object attrs, eol_type, charset_list;
4379
4380   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4381   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4382
4383   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4384   ccl.dst_multibyte = coding->dst_multibyte;
4385
4386   while (charbuf < charbuf_end && dst < adjusted_dst_end)
4387     {
4388       int dst_bytes = dst_end - dst;
4389       if (dst_bytes > 1024)
4390         dst_bytes = 1024;
4391
4392       ccl_driver (&ccl, charbuf, destination_charbuf,
4393                   charbuf_end - charbuf, dst_bytes, charset_list);
4394       charbuf += ccl.consumed;
4395       if (multibytep)
4396         for (i = 0; i < ccl.produced; i++)
4397           EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4398       else
4399         {
4400           for (i = 0; i < ccl.produced; i++)
4401             *dst++ = destination_charbuf[i] & 0xFF;
4402           produced_chars += ccl.produced;
4403         }
4404     }
4405
4406   switch (ccl.status)
4407     {
4408     case CCL_STAT_SUSPEND_BY_SRC:
4409       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4410       break;
4411     case CCL_STAT_SUSPEND_BY_DST:
4412       coding->result = CODING_RESULT_INSUFFICIENT_DST;
4413       break;
4414     case CCL_STAT_QUIT:
4415     case CCL_STAT_INVALID_CMD:
4416       coding->result = CODING_RESULT_INTERRUPT;
4417       break;
4418     default:
4419       coding->result = CODING_RESULT_SUCCESS;
4420       break;
4421     }
4422
4423   coding->produced_char += produced_chars;
4424   coding->produced = dst - coding->destination;
4425   return 0;
4426 }
4427
4428
4429 \f
4430 /*** 10, 11. no-conversion handlers ***/
4431
4432 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4433
4434 static void
4435 decode_coding_raw_text (coding)
4436      struct coding_system *coding;
4437 {
4438   coding->chars_at_source = 1;
4439   coding->consumed_char = 0;
4440   coding->consumed = 0;
4441   coding->result = CODING_RESULT_SUCCESS;
4442 }
4443
4444 static int
4445 encode_coding_raw_text (coding)
4446      struct coding_system *coding;
4447 {
4448   int multibytep = coding->dst_multibyte;
4449   int *charbuf = coding->charbuf;
4450   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4451   unsigned char *dst = coding->destination + coding->produced;
4452   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4453   int produced_chars = 0;
4454   int c;
4455
4456   if (multibytep)
4457     {
4458       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4459
4460       if (coding->src_multibyte)
4461         while (charbuf < charbuf_end)
4462           {
4463             ASSURE_DESTINATION (safe_room);
4464             c = *charbuf++;
4465             if (ASCII_CHAR_P (c))
4466               EMIT_ONE_ASCII_BYTE (c);
4467             else if (CHAR_BYTE8_P (c))
4468               {
4469                 c = CHAR_TO_BYTE8 (c);
4470                 EMIT_ONE_BYTE (c);
4471               }
4472             else
4473               {
4474                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4475
4476                 CHAR_STRING_ADVANCE (c, p1);
4477                 while (p0 < p1)
4478                   {
4479                     EMIT_ONE_BYTE (*p0);
4480                     p0++;
4481                   }
4482               }
4483           }
4484       else
4485         while (charbuf < charbuf_end)
4486           {
4487             ASSURE_DESTINATION (safe_room);
4488             c = *charbuf++;
4489             EMIT_ONE_BYTE (c);
4490           }
4491     }
4492   else
4493     {
4494       if (coding->src_multibyte)
4495         {
4496           int safe_room = MAX_MULTIBYTE_LENGTH;
4497
4498           while (charbuf < charbuf_end)
4499             {
4500               ASSURE_DESTINATION (safe_room);
4501               c = *charbuf++;
4502               if (ASCII_CHAR_P (c))
4503                 *dst++ = c;
4504               else if (CHAR_BYTE8_P (c))
4505                 *dst++ = CHAR_TO_BYTE8 (c);
4506               else
4507                 CHAR_STRING_ADVANCE (c, dst);
4508               produced_chars++;
4509             }
4510         }
4511       else
4512         {
4513           ASSURE_DESTINATION (charbuf_end - charbuf);
4514           while (charbuf < charbuf_end && dst < dst_end)
4515             *dst++ = *charbuf++;
4516           produced_chars = dst - (coding->destination + coding->dst_bytes);
4517         }
4518     }
4519   coding->result = CODING_RESULT_SUCCESS;
4520   coding->produced_char += produced_chars;
4521   coding->produced = dst - coding->destination;
4522   return 0;
4523 }
4524
4525 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4526    Check if a text is encoded in a charset-based coding system.  If it
4527    is, return 1, else return 0.  */
4528
4529 static int
4530 detect_coding_charset (coding, detect_info)
4531      struct coding_system *coding;
4532      struct coding_detection_info *detect_info;
4533 {
4534   unsigned char *src = coding->source, *src_base = src;
4535   unsigned char *src_end = coding->source + coding->src_bytes;
4536   int multibytep = coding->src_multibyte;
4537   int consumed_chars = 0;
4538   Lisp_Object attrs, valids;
4539   int found = 0;
4540
4541   detect_info->checked |= CATEGORY_MASK_CHARSET;
4542
4543   coding = &coding_categories[coding_category_charset];
4544   attrs = CODING_ID_ATTRS (coding->id);
4545   valids = AREF (attrs, coding_attr_charset_valids);
4546
4547   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4548     src += coding->head_ascii;
4549
4550   while (1)
4551     {
4552       int c;
4553
4554       ONE_MORE_BYTE (c);
4555       if (NILP (AREF (valids, c)))
4556         break;
4557       if (c >= 0x80)
4558         found = CATEGORY_MASK_CHARSET;
4559     }
4560   detect_info->rejected |= CATEGORY_MASK_CHARSET;
4561   return 0;
4562
4563  no_more_source:
4564   detect_info->found |= found;
4565   return 1;
4566 }
4567
4568 static void
4569 decode_coding_charset (coding)
4570      struct coding_system *coding;
4571 {
4572   unsigned char *src = coding->source + coding->consumed;
4573   unsigned char *src_end = coding->source + coding->src_bytes;
4574   unsigned char *src_base;
4575   int *charbuf = coding->charbuf;
4576   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4577   int consumed_chars = 0, consumed_chars_base;
4578   int multibytep = coding->src_multibyte;
4579   Lisp_Object attrs, eol_type, charset_list, valids;
4580   int char_offset = coding->produced_char;
4581   int last_offset = char_offset;
4582   int last_id = charset_ascii;
4583
4584   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4585   valids = AREF (attrs, coding_attr_charset_valids);
4586
4587   while (1)
4588     {
4589       int c;
4590
4591       src_base = src;
4592       consumed_chars_base = consumed_chars;
4593
4594       if (charbuf >= charbuf_end)
4595         break;
4596
4597       ONE_MORE_BYTE (c);
4598       if (c == '\r')
4599         {
4600           /* Here we assume that no charset maps '\r' to something
4601              else.  */
4602           if (EQ (eol_type, Qdos))
4603             {
4604               if (src == src_end)
4605                 {
4606                   coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4607                   goto no_more_source;
4608                 }
4609               if (*src == '\n')
4610                 ONE_MORE_BYTE (c);
4611             }
4612           else if (EQ (eol_type, Qmac))
4613             c = '\n';
4614         }
4615       else
4616         {
4617           Lisp_Object val;
4618           struct charset *charset;
4619           int dim;
4620           int len = 1;
4621           unsigned code = c;
4622
4623           val = AREF (valids, c);
4624           if (NILP (val))
4625             goto invalid_code;
4626           if (INTEGERP (val))
4627             {
4628               charset = CHARSET_FROM_ID (XFASTINT (val));
4629               dim = CHARSET_DIMENSION (charset);
4630               while (len < dim)
4631                 {
4632                   ONE_MORE_BYTE (c);
4633                   code = (code << 8) | c;
4634                   len++;
4635                 }
4636               CODING_DECODE_CHAR (coding, src, src_base, src_end,
4637                                   charset, code, c);
4638             }
4639           else
4640             {
4641               /* VAL is a list of charset IDs.  It is assured that the
4642                  list is sorted by charset dimensions (smaller one
4643                  comes first).  */
4644               while (CONSP (val))
4645                 {
4646                   charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4647                   dim = CHARSET_DIMENSION (charset);
4648                   while (len < dim)
4649                     {
4650                       ONE_MORE_BYTE (c);
4651                       code = (code << 8) | c;
4652                       len++;
4653                     }
4654                   CODING_DECODE_CHAR (coding, src, src_base,
4655                                       src_end, charset, code, c);
4656                   if (c >= 0)
4657                     break;
4658                   val = XCDR (val);
4659                 }
4660             }
4661           if (c < 0)
4662             goto invalid_code;
4663           if (charset->id != charset_ascii
4664               && last_id != charset->id)
4665             {
4666               if (last_id != charset_ascii)
4667                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4668               last_id = charset->id;
4669               last_offset = char_offset;
4670             }
4671         }
4672       *charbuf++ = c;
4673       char_offset++;
4674       continue;
4675
4676     invalid_code:
4677       src = src_base;
4678       consumed_chars = consumed_chars_base;
4679       ONE_MORE_BYTE (c);
4680       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4681       char_offset++;
4682       coding->errors++;
4683     }
4684
4685  no_more_source:
4686   if (last_id != charset_ascii)
4687     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4688   coding->consumed_char += consumed_chars_base;
4689   coding->consumed = src_base - coding->source;
4690   coding->charbuf_used = charbuf - coding->charbuf;
4691 }
4692
4693 static int
4694 encode_coding_charset (coding)
4695      struct coding_system *coding;
4696 {
4697   int multibytep = coding->dst_multibyte;
4698   int *charbuf = coding->charbuf;
4699   int *charbuf_end = charbuf + coding->charbuf_used;
4700   unsigned char *dst = coding->destination + coding->produced;
4701   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4702   int safe_room = MAX_MULTIBYTE_LENGTH;
4703   int produced_chars = 0;
4704   Lisp_Object attrs, eol_type, charset_list;
4705   int ascii_compatible;
4706   int c;
4707
4708   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4709   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4710
4711   while (charbuf < charbuf_end)
4712     {
4713       struct charset *charset;
4714       unsigned code;
4715
4716       ASSURE_DESTINATION (safe_room);
4717       c = *charbuf++;
4718       if (ascii_compatible && ASCII_CHAR_P (c))
4719         EMIT_ONE_ASCII_BYTE (c);
4720       else if (CHAR_BYTE8_P (c))
4721         {
4722           c = CHAR_TO_BYTE8 (c);
4723           EMIT_ONE_BYTE (c);
4724         }
4725       else
4726         {
4727           charset = char_charset (c, charset_list, &code);
4728           if (charset)
4729             {
4730               if (CHARSET_DIMENSION (charset) == 1)
4731                 EMIT_ONE_BYTE (code);
4732               else if (CHARSET_DIMENSION (charset) == 2)
4733                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4734               else if (CHARSET_DIMENSION (charset) == 3)
4735                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4736               else
4737                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4738                                  (code >> 8) & 0xFF, code & 0xFF);
4739             }
4740           else
4741             {
4742               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4743                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4744               else
4745                 c = coding->default_char;
4746               EMIT_ONE_BYTE (c);
4747             }
4748         }
4749     }
4750
4751   coding->result = CODING_RESULT_SUCCESS;
4752   coding->produced_char += produced_chars;
4753   coding->produced = dst - coding->destination;
4754   return 0;
4755 }
4756
4757 \f
4758 /*** 7. C library functions ***/
4759
4760 /* Setup coding context CODING from information about CODING_SYSTEM.
4761    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
4762    CODING_SYSTEM is invalid, signal an error.  */
4763
4764 void
4765 setup_coding_system (coding_system, coding)
4766      Lisp_Object coding_system;
4767      struct coding_system *coding;
4768 {
4769   Lisp_Object attrs;
4770   Lisp_Object eol_type;
4771   Lisp_Object coding_type;
4772   Lisp_Object val;
4773
4774   if (NILP (coding_system))
4775     coding_system = Qno_conversion;
4776
4777   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4778
4779   attrs = CODING_ID_ATTRS (coding->id);
4780   eol_type = CODING_ID_EOL_TYPE (coding->id);
4781
4782   coding->mode = 0;
4783   coding->head_ascii = -1;
4784   coding->common_flags
4785     = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4786
4787   val = CODING_ATTR_SAFE_CHARSETS (attrs);
4788   coding->max_charset_id = XSTRING (val)->size - 1;
4789   coding->safe_charsets = (char *) XSTRING (val)->data;
4790   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4791
4792   coding_type = CODING_ATTR_TYPE (attrs);
4793   if (EQ (coding_type, Qundecided))
4794     {
4795       coding->detector = NULL;
4796       coding->decoder = decode_coding_raw_text;
4797       coding->encoder = encode_coding_raw_text;
4798       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4799     }
4800   else if (EQ (coding_type, Qiso_2022))
4801     {
4802       int i;
4803       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4804       enum coding_category category =  XINT (CODING_ATTR_CATEGORY (attrs));
4805
4806       /* Invoke graphic register 0 to plane 0.  */
4807       CODING_ISO_INVOCATION (coding, 0) = 0;
4808       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
4809       CODING_ISO_INVOCATION (coding, 1)
4810         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4811       /* Setup the initial status of designation.  */
4812       for (i = 0; i < 4; i++)
4813         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4814       /* Not single shifting initially.  */
4815       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4816       /* Beginning of buffer should also be regarded as bol. */
4817       CODING_ISO_BOL (coding) = 1;
4818       coding->detector = detect_coding_iso_2022;
4819       coding->decoder = decode_coding_iso_2022;
4820       coding->encoder = encode_coding_iso_2022;
4821       if (flags & CODING_ISO_FLAG_SAFE)
4822         coding->mode |= CODING_MODE_SAFE_ENCODING;
4823       coding->common_flags
4824         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4825             | CODING_REQUIRE_FLUSHING_MASK);
4826       if (flags & CODING_ISO_FLAG_COMPOSITION)
4827         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4828       if (flags & CODING_ISO_FLAG_DESIGNATION)
4829         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
4830       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4831         {
4832           setup_iso_safe_charsets (attrs);
4833           val = CODING_ATTR_SAFE_CHARSETS (attrs);
4834           coding->max_charset_id = XSTRING (val)->size - 1;
4835           coding->safe_charsets = (char *) XSTRING (val)->data;
4836         }
4837       CODING_ISO_FLAGS (coding) = flags;
4838     }
4839   else if (EQ (coding_type, Qcharset))
4840     {
4841       coding->detector = detect_coding_charset;
4842       coding->decoder = decode_coding_charset;
4843       coding->encoder = encode_coding_charset;
4844       coding->common_flags
4845         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4846     }
4847   else if (EQ (coding_type, Qutf_8))
4848     {
4849       coding->detector = detect_coding_utf_8;
4850       coding->decoder = decode_coding_utf_8;
4851       coding->encoder = encode_coding_utf_8;
4852       coding->common_flags
4853         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4854     }
4855   else if (EQ (coding_type, Qutf_16))
4856     {
4857       val = AREF (attrs, coding_attr_utf_16_bom);
4858       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4859                                     : EQ (val, Qt) ? utf_16_with_bom
4860                                     : utf_16_without_bom);
4861       val = AREF (attrs, coding_attr_utf_16_endian);
4862       CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4863                                        : utf_16_little_endian);
4864       CODING_UTF_16_SURROGATE (coding) = 0;
4865       coding->detector = detect_coding_utf_16;
4866       coding->decoder = decode_coding_utf_16;
4867       coding->encoder = encode_coding_utf_16;
4868       coding->common_flags
4869         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4870     }
4871   else if (EQ (coding_type, Qccl))
4872     {
4873       coding->detector = detect_coding_ccl;
4874       coding->decoder = decode_coding_ccl;
4875       coding->encoder = encode_coding_ccl;
4876       coding->common_flags
4877         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4878             | CODING_REQUIRE_FLUSHING_MASK);
4879     }
4880   else if (EQ (coding_type, Qemacs_mule))
4881     {
4882       coding->detector = detect_coding_emacs_mule;
4883       coding->decoder = decode_coding_emacs_mule;
4884       coding->encoder = encode_coding_emacs_mule;
4885       coding->common_flags
4886         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4887       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4888           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4889         {
4890           Lisp_Object tail, safe_charsets;
4891           int max_charset_id = 0;
4892
4893           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4894                tail = XCDR (tail))
4895             if (max_charset_id < XFASTINT (XCAR (tail)))
4896               max_charset_id = XFASTINT (XCAR (tail));
4897           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4898                                         make_number (255));
4899           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4900                tail = XCDR (tail))
4901             XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4902           coding->max_charset_id = max_charset_id;
4903           coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4904         }
4905     }
4906   else if (EQ (coding_type, Qshift_jis))
4907     {
4908       coding->detector = detect_coding_sjis;
4909       coding->decoder = decode_coding_sjis;
4910       coding->encoder = encode_coding_sjis;
4911       coding->common_flags
4912         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4913     }
4914   else if (EQ (coding_type, Qbig5))
4915     {
4916       coding->detector = detect_coding_big5;
4917       coding->decoder = decode_coding_big5;
4918       coding->encoder = encode_coding_big5;
4919       coding->common_flags
4920         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4921     }
4922   else                          /* EQ (coding_type, Qraw_text) */
4923     {
4924       coding->detector = NULL;
4925       coding->decoder = decode_coding_raw_text;
4926       coding->encoder = encode_coding_raw_text;
4927       coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4928     }
4929
4930   return;
4931 }
4932
4933 /* Return raw-text or one of its subsidiaries that has the same
4934    eol_type as CODING-SYSTEM.  */
4935
4936 Lisp_Object
4937 raw_text_coding_system (coding_system)
4938      Lisp_Object coding_system;
4939 {
4940   Lisp_Object spec, attrs;
4941   Lisp_Object eol_type, raw_text_eol_type;
4942
4943   spec = CODING_SYSTEM_SPEC (coding_system);
4944   attrs = AREF (spec, 0);
4945
4946   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4947     return coding_system;
4948
4949   eol_type = AREF (spec, 2);
4950   if (VECTORP (eol_type))
4951     return Qraw_text;
4952   spec = CODING_SYSTEM_SPEC (Qraw_text);
4953   raw_text_eol_type = AREF (spec, 2);
4954   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4955           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4956           : AREF (raw_text_eol_type, 2));
4957 }
4958
4959
4960 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4961    does, return one of the subsidiary that has the same eol-spec as
4962    PARENT.  Otherwise, return CODING_SYSTEM.  */
4963
4964 Lisp_Object
4965 coding_inherit_eol_type (coding_system, parent)
4966      Lisp_Object coding_system, parent;
4967 {
4968   Lisp_Object spec, attrs, eol_type;
4969
4970   spec = CODING_SYSTEM_SPEC (coding_system);
4971   attrs = AREF (spec, 0);
4972   eol_type = AREF (spec, 2);
4973   if (VECTORP (eol_type))
4974     {
4975       Lisp_Object parent_spec;
4976       Lisp_Object parent_eol_type;
4977
4978       parent_spec
4979         = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4980       parent_eol_type = AREF (parent_spec, 2);
4981       if (EQ (parent_eol_type, Qunix))
4982         coding_system = AREF (eol_type, 0);
4983       else if (EQ (parent_eol_type, Qdos))
4984         coding_system = AREF (eol_type, 1);
4985       else if (EQ (parent_eol_type, Qmac))
4986         coding_system = AREF (eol_type, 2);
4987     }
4988   return coding_system;
4989 }
4990
4991 /* Emacs has a mechanism to automatically detect a coding system if it
4992    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
4993    it's impossible to distinguish some coding systems accurately
4994    because they use the same range of codes.  So, at first, coding
4995    systems are categorized into 7, those are:
4996
4997    o coding-category-emacs-mule
4998
4999         The category for a coding system which has the same code range
5000         as Emacs' internal format.  Assigned the coding-system (Lisp
5001         symbol) `emacs-mule' by default.
5002
5003    o coding-category-sjis
5004
5005         The category for a coding system which has the same code range
5006         as SJIS.  Assigned the coding-system (Lisp
5007         symbol) `japanese-shift-jis' by default.
5008
5009    o coding-category-iso-7
5010
5011         The category for a coding system which has the same code range
5012         as ISO2022 of 7-bit environment.  This doesn't use any locking
5013         shift and single shift functions.  This can encode/decode all
5014         charsets.  Assigned the coding-system (Lisp symbol)
5015         `iso-2022-7bit' by default.
5016
5017    o coding-category-iso-7-tight
5018
5019         Same as coding-category-iso-7 except that this can
5020         encode/decode only the specified charsets.
5021
5022    o coding-category-iso-8-1
5023
5024         The category for a coding system which has the same code range
5025         as ISO2022 of 8-bit environment and graphic plane 1 used only
5026         for DIMENSION1 charset.  This doesn't use any locking shift
5027         and single shift functions.  Assigned the coding-system (Lisp
5028         symbol) `iso-latin-1' by default.
5029
5030    o coding-category-iso-8-2
5031
5032         The category for a coding system which has the same code range
5033         as ISO2022 of 8-bit environment and graphic plane 1 used only
5034         for DIMENSION2 charset.  This doesn't use any locking shift
5035         and single shift functions.  Assigned the coding-system (Lisp
5036         symbol) `japanese-iso-8bit' by default.
5037
5038    o coding-category-iso-7-else
5039
5040         The category for a coding system which has the same code range
5041         as ISO2022 of 7-bit environemnt but uses locking shift or
5042         single shift functions.  Assigned the coding-system (Lisp
5043         symbol) `iso-2022-7bit-lock' by default.
5044
5045    o coding-category-iso-8-else
5046
5047         The category for a coding system which has the same code range
5048         as ISO2022 of 8-bit environemnt but uses locking shift or
5049         single shift functions.  Assigned the coding-system (Lisp
5050         symbol) `iso-2022-8bit-ss2' by default.
5051
5052    o coding-category-big5
5053
5054         The category for a coding system which has the same code range
5055         as BIG5.  Assigned the coding-system (Lisp symbol)
5056         `cn-big5' by default.
5057
5058    o coding-category-utf-8
5059
5060         The category for a coding system which has the same code range
5061         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
5062         symbol) `utf-8' by default.
5063
5064    o coding-category-utf-16-be
5065
5066         The category for a coding system in which a text has an
5067         Unicode signature (cf. Unicode Standard) in the order of BIG
5068         endian at the head.  Assigned the coding-system (Lisp symbol)
5069         `utf-16-be' by default.
5070
5071    o coding-category-utf-16-le
5072
5073         The category for a coding system in which a text has an
5074         Unicode signature (cf. Unicode Standard) in the order of
5075         LITTLE endian at the head.  Assigned the coding-system (Lisp
5076         symbol) `utf-16-le' by default.
5077
5078    o coding-category-ccl
5079
5080         The category for a coding system of which encoder/decoder is
5081         written in CCL programs.  The default value is nil, i.e., no
5082         coding system is assigned.
5083
5084    o coding-category-binary
5085
5086         The category for a coding system not categorized in any of the
5087         above.  Assigned the coding-system (Lisp symbol)
5088         `no-conversion' by default.
5089
5090    Each of them is a Lisp symbol and the value is an actual
5091    `coding-system's (this is also a Lisp symbol) assigned by a user.
5092    What Emacs does actually is to detect a category of coding system.
5093    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5094    decide only one possible category, it selects a category of the
5095    highest priority.  Priorities of categories are also specified by a
5096    user in a Lisp variable `coding-category-list'.
5097
5098 */
5099
5100 #define EOL_SEEN_NONE   0
5101 #define EOL_SEEN_LF     1
5102 #define EOL_SEEN_CR     2
5103 #define EOL_SEEN_CRLF   4
5104
5105 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5106    SOURCE is encoded.  If CATEGORY is one of
5107    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5108    two-byte, else they are encoded by one-byte.
5109
5110    Return one of EOL_SEEN_XXX.  */
5111
5112 #define MAX_EOL_CHECK_COUNT 3
5113
5114 static int
5115 detect_eol (source, src_bytes, category)
5116      unsigned char *source;
5117      EMACS_INT src_bytes;
5118      enum coding_category category;
5119 {
5120   unsigned char *src = source, *src_end = src + src_bytes;
5121   unsigned char c;
5122   int total  = 0;
5123   int eol_seen = EOL_SEEN_NONE;
5124
5125   if ((1 << category) & CATEGORY_MASK_UTF_16)
5126     {
5127       int msb, lsb;
5128
5129       msb = category == (coding_category_utf_16_le
5130                          | coding_category_utf_16_le_nosig);
5131       lsb = 1 - msb;
5132
5133       while (src + 1 < src_end)
5134         {
5135           c = src[lsb];
5136           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5137             {
5138               int this_eol;
5139
5140               if (c == '\n')
5141                 this_eol = EOL_SEEN_LF;
5142               else if (src + 3 >= src_end
5143                        || src[msb + 2] != 0
5144                        || src[lsb + 2] != '\n')
5145                 this_eol = EOL_SEEN_CR;
5146               else
5147                 this_eol = EOL_SEEN_CRLF;
5148
5149               if (eol_seen == EOL_SEEN_NONE)
5150                 /* This is the first end-of-line.  */
5151                 eol_seen = this_eol;
5152               else if (eol_seen != this_eol)
5153                 {
5154                   /* The found type is different from what found before.  */
5155                   eol_seen = EOL_SEEN_LF;
5156                   break;
5157                 }
5158               if (++total == MAX_EOL_CHECK_COUNT)
5159                 break;
5160             }
5161           src += 2;
5162         }
5163     }
5164   else
5165     {
5166       while (src < src_end)
5167         {
5168           c = *src++;
5169           if (c == '\n' || c == '\r')
5170             {
5171               int this_eol;
5172
5173               if (c == '\n')
5174                 this_eol = EOL_SEEN_LF;
5175               else if (src >= src_end || *src != '\n')
5176                 this_eol = EOL_SEEN_CR;
5177               else
5178                 this_eol = EOL_SEEN_CRLF, src++;
5179
5180               if (eol_seen == EOL_SEEN_NONE)
5181                 /* This is the first end-of-line.  */
5182                 eol_seen = this_eol;
5183               else if (eol_seen != this_eol)
5184                 {
5185                   /* The found type is different from what found before.  */
5186                   eol_seen = EOL_SEEN_LF;
5187                   break;
5188                 }
5189               if (++total == MAX_EOL_CHECK_COUNT)
5190                 break;
5191             }
5192         }
5193     }
5194   return eol_seen;
5195 }
5196
5197
5198 static void
5199 adjust_coding_eol_type (coding, eol_seen)
5200      struct coding_system *coding;
5201      int eol_seen;
5202 {
5203   Lisp_Object eol_type;
5204
5205   eol_type = CODING_ID_EOL_TYPE (coding->id);
5206   if (eol_seen & EOL_SEEN_LF)
5207     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5208   else if (eol_seen & EOL_SEEN_CRLF)
5209     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5210   else if (eol_seen & EOL_SEEN_CR)
5211     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5212 }
5213
5214 /* Detect how a text specified in CODING is encoded.  If a coding
5215    system is detected, update fields of CODING by the detected coding
5216    system.  */
5217
5218 void
5219 detect_coding (coding)
5220      struct coding_system *coding;
5221 {
5222   unsigned char *src, *src_end;
5223   Lisp_Object attrs, coding_type;
5224
5225   coding->consumed = coding->consumed_char = 0;
5226   coding->produced = coding->produced_char = 0;
5227   coding_set_source (coding);
5228
5229   src_end = coding->source + coding->src_bytes;
5230
5231   /* If we have not yet decided the text encoding type, detect it
5232      now.  */
5233   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5234     {
5235       int c, i;
5236
5237       for (src = coding->source; src < src_end; src++)
5238         {
5239           c = *src;
5240           if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5241                                         || c == ISO_CODE_SI
5242                                         || c == ISO_CODE_SO)))
5243             break;
5244         }
5245       coding->head_ascii = src - (coding->source + coding->consumed);
5246
5247       if (coding->head_ascii < coding->src_bytes)
5248         {
5249           struct coding_detection_info detect_info;
5250           enum coding_category category;
5251           struct coding_system *this;
5252
5253           detect_info.checked = detect_info.found = detect_info.rejected = 0;
5254           for (i = 0; i < coding_category_raw_text; i++)
5255             {
5256               category = coding_priorities[i];
5257               this = coding_categories + category;
5258               if (this->id < 0)
5259                 {
5260                   /* No coding system of this category is defined.  */
5261                   detect_info.rejected |= (1 << category);
5262                 }
5263               else if (category >= coding_category_raw_text)
5264                 continue;
5265               else if (detect_info.checked & (1 << category))
5266                 {
5267                   if (detect_info.found & (1 << category))
5268                     break;
5269                 }
5270               else if ((*(this->detector)) (coding, &detect_info)
5271                        && detect_info.found & (1 << category))
5272                 break;
5273             }
5274           if (i < coding_category_raw_text)
5275             setup_coding_system (CODING_ID_NAME (this->id), coding);
5276           else if (detect_info.rejected == CATEGORY_MASK_ANY)
5277             setup_coding_system (Qraw_text, coding);
5278           else if (detect_info.rejected)
5279             for (i = 0; i < coding_category_raw_text; i++)
5280               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5281                 {
5282                   this = coding_categories + coding_priorities[i];
5283                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5284                   break;
5285                 }
5286         }
5287     }
5288
5289   attrs = CODING_ID_ATTRS (coding->id);
5290   coding_type = CODING_ATTR_TYPE (attrs);
5291
5292   /* If we have not yet decided the EOL type, detect it now.  But, the
5293      detection is impossible for a CCL based coding system, in which
5294      case, we detct the EOL type after decoding.  */
5295   if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5296       && ! EQ (coding_type, Qccl))
5297     {
5298       int eol_seen = detect_eol (coding->source, coding->src_bytes,
5299                                  XINT (CODING_ATTR_CATEGORY (attrs)));
5300
5301       if (eol_seen != EOL_SEEN_NONE)
5302         adjust_coding_eol_type (coding, eol_seen);
5303     }
5304 }
5305
5306
5307 static void
5308 decode_eol (coding)
5309      struct coding_system *coding;
5310 {
5311   if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
5312     {
5313       unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5314       unsigned char *pend = p + coding->produced;
5315       int eol_seen = EOL_SEEN_NONE;
5316
5317       for (; p < pend; p++)
5318         {
5319           if (*p == '\n')
5320             eol_seen |= EOL_SEEN_LF;
5321           else if (*p == '\r')
5322             {
5323               if (p + 1 < pend && *(p + 1) == '\n')
5324                 {
5325                   eol_seen |= EOL_SEEN_CRLF;
5326                   p++;
5327                 }
5328               else
5329                 eol_seen |= EOL_SEEN_CR;
5330             }
5331         }
5332       if (eol_seen != EOL_SEEN_NONE)
5333         adjust_coding_eol_type (coding, eol_seen);
5334     }
5335
5336   if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5337     {
5338       unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5339       unsigned char *pend = p + coding->produced;
5340
5341       for (; p < pend; p++)
5342         if (*p == '\r')
5343           *p = '\n';
5344     }
5345   else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5346     {
5347       unsigned char *p, *pbeg, *pend;
5348       Lisp_Object undo_list;
5349
5350       move_gap_both (coding->dst_pos + coding->produced_char,
5351                      coding->dst_pos_byte + coding->produced);
5352       undo_list = current_buffer->undo_list;
5353       current_buffer->undo_list = Qt;
5354       del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
5355       current_buffer->undo_list = undo_list;
5356       pbeg = GPT_ADDR;
5357       pend = pbeg + coding->produced;
5358
5359       for (p = pend - 1; p >= pbeg; p--)
5360         if (*p == '\r')
5361           {
5362             safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5363             pend--;
5364           }
5365       coding->produced_char -= coding->produced - (pend - pbeg);
5366       coding->produced = pend - pbeg;
5367       insert_from_gap (coding->produced_char, coding->produced);
5368     }
5369 }
5370
5371 static void
5372 translate_chars (coding, table)
5373      struct coding_system *coding;
5374      Lisp_Object table;
5375 {
5376   int *charbuf = coding->charbuf;
5377   int *charbuf_end = charbuf + coding->charbuf_used;
5378   int c;
5379
5380   if (coding->chars_at_source)
5381     return;
5382
5383   while (charbuf < charbuf_end)
5384     {
5385       c = *charbuf;
5386       if (c < 0)
5387         charbuf += c;
5388       else
5389         *charbuf++ = translate_char (table, c);
5390     }
5391 }
5392
5393 static int
5394 produce_chars (coding)
5395      struct coding_system *coding;
5396 {
5397   unsigned char *dst = coding->destination + coding->produced;
5398   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5399   int produced;
5400   int produced_chars = 0;
5401
5402   if (! coding->chars_at_source)
5403     {
5404       /* Characters are in coding->charbuf.  */
5405       int *buf = coding->charbuf;
5406       int *buf_end = buf + coding->charbuf_used;
5407       unsigned char *adjusted_dst_end;
5408
5409       if (BUFFERP (coding->src_object)
5410           && EQ (coding->src_object, coding->dst_object))
5411         dst_end = coding->source + coding->consumed;
5412       adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5413
5414       while (buf < buf_end)
5415         {
5416           int c = *buf++;
5417
5418           if (dst >= adjusted_dst_end)
5419             {
5420               dst = alloc_destination (coding,
5421                                        buf_end - buf + MAX_MULTIBYTE_LENGTH,
5422                                        dst);
5423               dst_end = coding->destination + coding->dst_bytes;
5424               adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5425             }
5426           if (c >= 0)
5427             {
5428               if (coding->dst_multibyte
5429                   || ! CHAR_BYTE8_P (c))
5430                 CHAR_STRING_ADVANCE (c, dst);
5431               else
5432                 *dst++ = CHAR_TO_BYTE8 (c);
5433               produced_chars++;
5434             }
5435           else
5436             /* This is an annotation datum.  */
5437             buf -= c + 1;
5438         }
5439     }
5440   else
5441     {
5442       unsigned char *src = coding->source;
5443       unsigned char *src_end = src + coding->src_bytes;
5444       Lisp_Object eol_type;
5445
5446       eol_type = CODING_ID_EOL_TYPE (coding->id);
5447
5448       if (coding->src_multibyte != coding->dst_multibyte)
5449         {
5450           if (coding->src_multibyte)
5451             {
5452               int multibytep = 1;
5453               int consumed_chars;
5454
5455               while (1)
5456                 {
5457                   unsigned char *src_base = src;
5458                   int c;
5459
5460                   ONE_MORE_BYTE (c);
5461                   if (c == '\r')
5462                     {
5463                       if (EQ (eol_type, Qdos))
5464                         {
5465                           if (src == src_end)
5466                             {
5467                               coding->result = CODING_RESULT_INSUFFICIENT_SRC;
5468                               goto no_more_source;
5469                             }
5470                           if (*src == '\n')
5471                             c = *src++;
5472                         }
5473                       else if (EQ (eol_type, Qmac))
5474                         c = '\n';
5475                     }
5476                   if (dst == dst_end)
5477                     {
5478                       coding->consumed = src - coding->source;
5479
5480                     if (EQ (coding->src_object, coding->dst_object))
5481                       dst_end = src;
5482                     if (dst == dst_end)
5483                       {
5484                         dst = alloc_destination (coding, src_end - src + 1,
5485                                                  dst);
5486                         dst_end = coding->destination + coding->dst_bytes;
5487                         coding_set_source (coding);
5488                         src = coding->source + coding->consumed;
5489                         src_end = coding->source + coding->src_bytes;
5490                       }
5491                     }
5492                   *dst++ = c;
5493                   produced_chars++;
5494                 }
5495             no_more_source:
5496               ;
5497             }
5498           else
5499             while (src < src_end)
5500               {
5501                 int multibytep = 1;
5502                 int c = *src++;
5503
5504                 if (c == '\r')
5505                   {
5506                     if (EQ (eol_type, Qdos))
5507                       {
5508                         if (src < src_end
5509                             && *src == '\n')
5510                           c = *src++;
5511                       }
5512                     else if (EQ (eol_type, Qmac))
5513                       c = '\n';
5514                   }
5515                 if (dst >= dst_end - 1)
5516                   {
5517                     coding->consumed = src - coding->source;
5518
5519                     if (EQ (coding->src_object, coding->dst_object))
5520                       dst_end = src;
5521                     if (dst >= dst_end - 1)
5522                       {
5523                         dst = alloc_destination (coding, src_end - src + 2,
5524                                                  dst);
5525                         dst_end = coding->destination + coding->dst_bytes;
5526                         coding_set_source (coding);
5527                         src = coding->source + coding->consumed;
5528                         src_end = coding->source + coding->src_bytes;
5529                       }
5530                   }
5531                 EMIT_ONE_BYTE (c);
5532               }
5533         }
5534       else
5535         {
5536           if (!EQ (coding->src_object, coding->dst_object))
5537             {
5538               int require = coding->src_bytes - coding->dst_bytes;
5539
5540               if (require > 0)
5541                 {
5542                   EMACS_INT offset = src - coding->source;
5543
5544                   dst = alloc_destination (coding, require, dst);
5545                   coding_set_source (coding);
5546                   src = coding->source + offset;
5547                   src_end = coding->source + coding->src_bytes;
5548                 }
5549             }
5550           produced_chars = coding->src_chars;
5551           while (src < src_end)
5552             {
5553               int c = *src++;
5554
5555               if (c == '\r')
5556                 {
5557                   if (EQ (eol_type, Qdos))
5558                     {
5559                       if (src < src_end
5560                           && *src == '\n')
5561                         c = *src++;
5562                       produced_chars--;
5563                     }
5564                   else if (EQ (eol_type, Qmac))
5565                     c = '\n';
5566                 }
5567               *dst++ = c;
5568             }
5569         }
5570       coding->consumed = coding->src_bytes;
5571       coding->consumed_char = coding->src_chars;
5572     }
5573
5574   produced = dst - (coding->destination + coding->produced);
5575   if (BUFFERP (coding->dst_object))
5576     insert_from_gap (produced_chars, produced);
5577   coding->produced += produced;
5578   coding->produced_char += produced_chars;
5579   return produced_chars;
5580 }
5581
5582 /* Compose text in CODING->object according to the annotation data at
5583    CHARBUF.  CHARBUF is an array:
5584      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5585  */
5586
5587 static INLINE void
5588 produce_composition (coding, charbuf)
5589      struct coding_system *coding;
5590      int *charbuf;
5591 {
5592   int len;
5593   EMACS_INT from, to;
5594   enum composition_method method;
5595   Lisp_Object components;
5596
5597   len = -charbuf[0];
5598   from = coding->dst_pos + charbuf[2];
5599   to = coding->dst_pos + charbuf[3];
5600   method = (enum composition_method) (charbuf[4]);
5601
5602   if (method == COMPOSITION_RELATIVE)
5603     components = Qnil;
5604   else
5605     {
5606       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5607       int i;
5608
5609       len -= 5;
5610       charbuf += 5;
5611       for (i = 0; i < len; i++)
5612         args[i] = make_number (charbuf[i]);
5613       components = (method == COMPOSITION_WITH_ALTCHARS
5614                     ? Fstring (len, args) : Fvector (len, args));
5615     }
5616   compose_text (from, to, components, Qnil, coding->dst_object);
5617 }
5618
5619
5620 /* Put `charset' property on text in CODING->object according to
5621    the annotation data at CHARBUF.  CHARBUF is an array:
5622      [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5623  */
5624
5625 static INLINE void
5626 produce_charset (coding, charbuf)
5627      struct coding_system *coding;
5628      int *charbuf;
5629 {
5630   EMACS_INT from = coding->dst_pos + charbuf[2];
5631   EMACS_INT to = coding->dst_pos + charbuf[3];
5632   struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
5633
5634   Fput_text_property (make_number (from), make_number (to),
5635                       Qcharset, CHARSET_NAME (charset),
5636                       coding->dst_object);
5637 }
5638
5639
5640 #define CHARBUF_SIZE 0x4000
5641
5642 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
5643   do {                                                                  \
5644     int size = CHARBUF_SIZE;;                                           \
5645                                                                         \
5646     coding->charbuf = NULL;                                             \
5647     while (size > 1024)                                                 \
5648       {                                                                 \
5649         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
5650         if (coding->charbuf)                                            \
5651           break;                                                        \
5652         size >>= 1;                                                     \
5653       }                                                                 \
5654     if (! coding->charbuf)                                              \
5655       {                                                                 \
5656         coding->result = CODING_RESULT_INSUFFICIENT_MEM;                \
5657         return coding->result;                                          \
5658       }                                                                 \
5659     coding->charbuf_size = size;                                        \
5660   } while (0)
5661
5662
5663 static void
5664 produce_annotation (coding)
5665      struct coding_system *coding;
5666 {
5667   int *charbuf = coding->charbuf;
5668   int *charbuf_end = charbuf + coding->charbuf_used;
5669
5670   if (NILP (coding->dst_object))
5671     return;
5672
5673   while (charbuf < charbuf_end)
5674     {
5675       if (*charbuf >= 0)
5676         charbuf++;
5677       else
5678         {
5679           int len = -*charbuf;
5680           switch (charbuf[1])
5681             {
5682             case CODING_ANNOTATE_COMPOSITION_MASK:
5683               produce_composition (coding, charbuf);
5684               break;
5685             case CODING_ANNOTATE_CHARSET_MASK:
5686               produce_charset (coding, charbuf);
5687               break;
5688             default:
5689               abort ();
5690             }
5691           charbuf += len;
5692         }
5693     }
5694 }
5695
5696 /* Decode the data at CODING->src_object into CODING->dst_object.
5697    CODING->src_object is a buffer, a string, or nil.
5698    CODING->dst_object is a buffer.
5699
5700    If CODING->src_object is a buffer, it must be the current buffer.
5701    In this case, if CODING->src_pos is positive, it is a position of
5702    the source text in the buffer, otherwise, the source text is in the
5703    gap area of the buffer, and CODING->src_pos specifies the offset of
5704    the text from GPT (which must be the same as PT).  If this is the
5705    same buffer as CODING->dst_object, CODING->src_pos must be
5706    negative.
5707
5708    If CODING->src_object is a string, CODING->src_pos in an index to
5709    that string.
5710
5711    If CODING->src_object is nil, CODING->source must already point to
5712    the non-relocatable memory area.  In this case, CODING->src_pos is
5713    an offset from CODING->source.
5714
5715    The decoded data is inserted at the current point of the buffer
5716    CODING->dst_object.
5717 */
5718
5719 static int
5720 decode_coding (coding)
5721      struct coding_system *coding;
5722 {
5723   Lisp_Object attrs;
5724
5725   if (BUFFERP (coding->src_object)
5726       && coding->src_pos > 0
5727       && coding->src_pos < GPT
5728       && coding->src_pos + coding->src_chars > GPT)
5729     move_gap_both (coding->src_pos, coding->src_pos_byte);
5730
5731   if (BUFFERP (coding->dst_object))
5732     {
5733       if (current_buffer != XBUFFER (coding->dst_object))
5734         set_buffer_internal (XBUFFER (coding->dst_object));
5735       if (GPT != PT)
5736         move_gap_both (PT, PT_BYTE);
5737     }
5738
5739   coding->consumed = coding->consumed_char = 0;
5740   coding->produced = coding->produced_char = 0;
5741   coding->chars_at_source = 0;
5742   coding->result = CODING_RESULT_SUCCESS;
5743   coding->errors = 0;
5744
5745   ALLOC_CONVERSION_WORK_AREA (coding);
5746
5747   attrs = CODING_ID_ATTRS (coding->id);
5748
5749   do
5750     {
5751       coding_set_source (coding);
5752       coding->annotated = 0;
5753       (*(coding->decoder)) (coding);
5754       if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5755         translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs));
5756       else if (!NILP (Vstandard_translation_table_for_decode))
5757         translate_chars (coding, Vstandard_translation_table_for_decode);
5758       coding_set_destination (coding);
5759       produce_chars (coding);
5760       if (coding->annotated)
5761         produce_annotation (coding);
5762     }
5763   while (coding->consumed < coding->src_bytes
5764          && ! coding->result);
5765
5766   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5767       && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5768       && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5769     decode_eol (coding);
5770
5771   coding->carryover_bytes = 0;
5772   if (coding->consumed < coding->src_bytes)
5773     {
5774       int nbytes = coding->src_bytes - coding->consumed;
5775       unsigned char *src;
5776
5777       coding_set_source (coding);
5778       coding_set_destination (coding);
5779       src = coding->source + coding->consumed;
5780
5781       if (coding->mode & CODING_MODE_LAST_BLOCK)
5782         {
5783           /* Flush out unprocessed data as binary chars.  We are sure
5784              that the number of data is less than the size of
5785              coding->charbuf.  */
5786           while (nbytes-- > 0)
5787             {
5788               int c = *src++;
5789
5790               coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
5791             }
5792           produce_chars (coding);
5793         }
5794       else
5795         {
5796           /* Record unprocessed bytes in coding->carryover.  We are
5797              sure that the number of data is less than the size of
5798              coding->carryover.  */
5799           unsigned char *p = coding->carryover;
5800
5801           coding->carryover_bytes = nbytes;
5802           while (nbytes-- > 0)
5803             *p++ = *src++;
5804         }
5805       coding->consumed = coding->src_bytes;
5806     }
5807
5808   return coding->result;
5809 }
5810
5811
5812 /* Extract an annotation datum from a composition starting at POS and
5813    ending before LIMIT of CODING->src_object (buffer or string), store
5814    the data in BUF, set *STOP to a starting position of the next
5815    composition (if any) or to LIMIT, and return the address of the
5816    next element of BUF.
5817
5818    If such an annotation is not found, set *STOP to a starting
5819    position of a composition after POS (if any) or to LIMIT, and
5820    return BUF.  */
5821
5822 static INLINE int *
5823 handle_composition_annotation (pos, limit, coding, buf, stop)
5824      EMACS_INT pos, limit;
5825      struct coding_system *coding;
5826      int *buf;
5827      EMACS_INT *stop;
5828 {
5829   EMACS_INT start, end;
5830   Lisp_Object prop;
5831
5832   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
5833       || end > limit)
5834     *stop = limit;
5835   else if (start > pos)
5836     *stop = start;
5837   else
5838     {
5839       if (start == pos)
5840         {
5841           /* We found a composition.  Store the corresponding
5842              annotation data in BUF.  */
5843           int *head = buf;
5844           enum composition_method method = COMPOSITION_METHOD (prop);
5845           int nchars = COMPOSITION_LENGTH (prop);
5846
5847           ADD_COMPOSITION_DATA (buf, 0, nchars, method);
5848           if (method != COMPOSITION_RELATIVE)
5849             {
5850               Lisp_Object components;
5851               int len, i, i_byte;
5852
5853               components = COMPOSITION_COMPONENTS (prop);
5854               if (VECTORP (components))
5855                 {
5856                   len = XVECTOR (components)->size;
5857                   for (i = 0; i < len; i++)
5858                     *buf++ = XINT (AREF (components, i));
5859                 }
5860               else if (STRINGP (components))
5861                 {
5862                   len = XSTRING (components)->size;
5863                   i = i_byte = 0;
5864                   while (i < len)
5865                     {
5866                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
5867                       buf++;
5868                     }
5869                 }
5870               else if (INTEGERP (components))
5871                 {
5872                   len = 1;
5873                   *buf++ = XINT (components);
5874                 }
5875               else if (CONSP (components))
5876                 {
5877                   for (len = 0; CONSP (components);
5878                        len++, components = XCDR (components))
5879                     *buf++ = XINT (XCAR (components));
5880                 }
5881               else
5882                 abort ();
5883               *head -= len;
5884             }
5885         }
5886
5887       if (find_composition (end, limit, &start, &end, &prop,
5888                             coding->src_object)
5889           && end <= limit)
5890         *stop = start;
5891       else
5892         *stop = limit;
5893     }
5894   return buf;
5895 }
5896
5897
5898 /* Extract an annotation datum from a text property `charset' at POS of
5899    CODING->src_object (buffer of string), store the data in BUF, set
5900    *STOP to the position where the value of `charset' property changes
5901    (limiting by LIMIT), and return the address of the next element of
5902    BUF.
5903
5904    If the property value is nil, set *STOP to the position where the
5905    property value is non-nil (limiting by LIMIT), and return BUF.  */
5906
5907 static INLINE int *
5908 handle_charset_annotation (pos, limit, coding, buf, stop)
5909      EMACS_INT pos, limit;
5910      struct coding_system *coding;
5911      int *buf;
5912      EMACS_INT *stop;
5913 {
5914   Lisp_Object val, next;
5915   int id;
5916
5917   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
5918   if (! NILP (val) && CHARSETP (val))
5919     id = XINT (CHARSET_SYMBOL_ID (val));
5920   else
5921     id = -1;
5922   ADD_CHARSET_DATA (buf, 0, 0, id);
5923   next = Fnext_single_property_change (make_number (pos), Qcharset,
5924                                        coding->src_object,
5925                                        make_number (limit));
5926   *stop = XINT (next);
5927   return buf;
5928 }
5929
5930
5931 static void
5932 consume_chars (coding)
5933      struct coding_system *coding;
5934 {
5935   int *buf = coding->charbuf;
5936   int *buf_end = coding->charbuf + coding->charbuf_size;
5937   const unsigned char *src = coding->source + coding->consumed;
5938   EMACS_INT pos = coding->src_pos + coding->consumed_char;
5939   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
5940   int multibytep = coding->src_multibyte;
5941   Lisp_Object eol_type;
5942   int c;
5943   EMACS_INT stop, stop_composition, stop_charset;
5944   int id;
5945
5946   eol_type = CODING_ID_EOL_TYPE (coding->id);
5947   if (VECTORP (eol_type))
5948     eol_type = Qunix;
5949
5950   /* Note: composition handling is not yet implemented.  */
5951   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
5952
5953   if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
5954     stop = stop_composition = pos;
5955   else
5956     stop = stop_composition = end_pos;
5957   if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
5958     stop = stop_charset = pos;
5959   else
5960     stop_charset = end_pos;
5961
5962   /* Compensate for CRLF and annotation.  */
5963   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
5964   while (buf < buf_end)
5965     {
5966       if (pos == stop)
5967         {
5968           int *p;
5969
5970           if (pos == end_pos)
5971             break;
5972           if (pos == stop_composition)
5973             buf = handle_composition_annotation (pos, end_pos, coding,
5974                                                  buf, &stop_composition);
5975           if (pos == stop_charset)
5976             buf = handle_charset_annotation (pos, end_pos, coding,
5977                                              buf, &stop_charset);
5978           stop = (stop_composition < stop_charset
5979                   ? stop_composition : stop_charset);
5980         }
5981
5982       if (! multibytep)
5983         c = *src++;
5984       else
5985         c = STRING_CHAR_ADVANCE (src);
5986       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5987         c = '\n';
5988       if (! EQ (eol_type, Qunix))
5989         {
5990           if (c == '\n')
5991             {
5992               if (EQ (eol_type, Qdos))
5993                 *buf++ = '\r';
5994               else
5995                 c = '\r';
5996             }
5997         }
5998       *buf++ = c;
5999       pos++;
6000     }
6001
6002   coding->consumed = src - coding->source;
6003   coding->consumed_char = pos - coding->src_pos;
6004   coding->charbuf_used = buf - coding->charbuf;
6005   coding->chars_at_source = 0;
6006 }
6007
6008
6009 /* Encode the text at CODING->src_object into CODING->dst_object.
6010    CODING->src_object is a buffer or a string.
6011    CODING->dst_object is a buffer or nil.
6012
6013    If CODING->src_object is a buffer, it must be the current buffer.
6014    In this case, if CODING->src_pos is positive, it is a position of
6015    the source text in the buffer, otherwise. the source text is in the
6016    gap area of the buffer, and coding->src_pos specifies the offset of
6017    the text from GPT (which must be the same as PT).  If this is the
6018    same buffer as CODING->dst_object, CODING->src_pos must be
6019    negative and CODING should not have `pre-write-conversion'.
6020
6021    If CODING->src_object is a string, CODING should not have
6022    `pre-write-conversion'.
6023
6024    If CODING->dst_object is a buffer, the encoded data is inserted at
6025    the current point of that buffer.
6026
6027    If CODING->dst_object is nil, the encoded data is placed at the
6028    memory area specified by CODING->destination.  */
6029
6030 static int
6031 encode_coding (coding)
6032      struct coding_system *coding;
6033 {
6034   Lisp_Object attrs;
6035
6036   attrs = CODING_ID_ATTRS (coding->id);
6037
6038   if (BUFFERP (coding->dst_object))
6039     {
6040       set_buffer_internal (XBUFFER (coding->dst_object));
6041       coding->dst_multibyte
6042         = ! NILP (current_buffer->enable_multibyte_characters);
6043     }
6044
6045   coding->consumed = coding->consumed_char = 0;
6046   coding->produced = coding->produced_char = 0;
6047   coding->result = CODING_RESULT_SUCCESS;
6048   coding->errors = 0;
6049
6050   ALLOC_CONVERSION_WORK_AREA (coding);
6051
6052   do {
6053     coding_set_source (coding);
6054     consume_chars (coding);
6055
6056     if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
6057       translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs));
6058     else if (!NILP (Vstandard_translation_table_for_encode))
6059       translate_chars (coding, Vstandard_translation_table_for_encode);
6060
6061     coding_set_destination (coding);
6062     (*(coding->encoder)) (coding);
6063   } while (coding->consumed_char < coding->src_chars);
6064
6065   if (BUFFERP (coding->dst_object))
6066     insert_from_gap (coding->produced_char, coding->produced);
6067
6068   return (coding->result);
6069 }
6070
6071 /* Work buffer */
6072
6073 /* List of currently used working buffer.  */
6074 Lisp_Object Vcode_conversion_work_buf_list;
6075
6076 /* A working buffer used by the top level conversion.  */
6077 Lisp_Object Vcode_conversion_reused_work_buf;
6078
6079
6080 /* Return a working buffer that can be freely used by the following
6081    code conversion.  MULTIBYTEP specifies the multibyteness of the
6082    buffer.  */
6083
6084 Lisp_Object
6085 make_conversion_work_buffer (multibytep)
6086      int multibytep;
6087 {
6088   struct buffer *current = current_buffer;
6089   Lisp_Object buf;
6090
6091   if (NILP (Vcode_conversion_work_buf_list))
6092     {
6093       if (NILP (Vcode_conversion_reused_work_buf))
6094         Vcode_conversion_reused_work_buf
6095           = Fget_buffer_create (build_string (" *code-conversion-work*"));
6096       Vcode_conversion_work_buf_list
6097         = Fcons (Vcode_conversion_reused_work_buf, Qnil);
6098     }
6099   else
6100     {
6101       int depth = XINT (Flength (Vcode_conversion_work_buf_list));
6102       char str[128];
6103
6104       sprintf (str, " *code-conversion-work*<%d>", depth);
6105       Vcode_conversion_work_buf_list
6106         = Fcons (Fget_buffer_create (build_string (str)),
6107                  Vcode_conversion_work_buf_list);
6108     }
6109
6110   buf = XCAR (Vcode_conversion_work_buf_list);
6111   set_buffer_internal (XBUFFER (buf));
6112   current_buffer->undo_list = Qt;
6113   Ferase_buffer ();
6114   Fset_buffer_multibyte (multibytep ? Qt : Qnil, Qnil);
6115   set_buffer_internal (current);
6116   return buf;
6117 }
6118
6119 static struct coding_system *saved_coding;
6120
6121 Lisp_Object
6122 code_conversion_restore (info)
6123      Lisp_Object info;
6124 {
6125   int depth = XINT (Flength (Vcode_conversion_work_buf_list));
6126   Lisp_Object buf;
6127
6128   if (depth > 0)
6129     {
6130       buf = XCAR (Vcode_conversion_work_buf_list);
6131       Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
6132       if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
6133         Fkill_buffer (buf);
6134     }
6135
6136   if (EQ (saved_coding->dst_object, Qt)
6137       && saved_coding->destination)
6138     xfree (saved_coding->destination);
6139
6140   return save_excursion_restore (info);
6141 }
6142
6143
6144 int
6145 decode_coding_gap (coding, chars, bytes)
6146      struct coding_system *coding;
6147      EMACS_INT chars, bytes;
6148 {
6149   int count = specpdl_ptr - specpdl;
6150
6151   saved_coding = coding;
6152   record_unwind_protect (code_conversion_restore, save_excursion_save ());
6153
6154   coding->src_object = Fcurrent_buffer ();
6155   coding->src_chars = chars;
6156   coding->src_bytes = bytes;
6157   coding->src_pos = -chars;
6158   coding->src_pos_byte = -bytes;
6159   coding->src_multibyte = chars < bytes;
6160   coding->dst_object = coding->src_object;
6161   coding->dst_pos = PT;
6162   coding->dst_pos_byte = PT_BYTE;
6163   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6164   coding->mode |= CODING_MODE_LAST_BLOCK;
6165
6166   if (CODING_REQUIRE_DETECTION (coding))
6167     detect_coding (coding);
6168
6169   decode_coding (coding);
6170
6171   unbind_to (count, Qnil);
6172   return coding->result;
6173 }
6174
6175 int
6176 encode_coding_gap (coding, chars, bytes)
6177      struct coding_system *coding;
6178      EMACS_INT chars, bytes;
6179 {
6180   int count = specpdl_ptr - specpdl;
6181   Lisp_Object buffer;
6182
6183   saved_coding = coding;
6184   record_unwind_protect (code_conversion_restore, save_excursion_save ());
6185
6186   buffer = Fcurrent_buffer ();
6187   coding->src_object = buffer;
6188   coding->src_chars = chars;
6189   coding->src_bytes = bytes;
6190   coding->src_pos = -chars;
6191   coding->src_pos_byte = -bytes;
6192   coding->src_multibyte = chars < bytes;
6193   coding->dst_object = coding->src_object;
6194   coding->dst_pos = PT;
6195   coding->dst_pos_byte = PT_BYTE;
6196
6197   encode_coding (coding);
6198
6199   unbind_to (count, Qnil);
6200   return coding->result;
6201 }
6202
6203
6204 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6205    SRC_OBJECT into DST_OBJECT by coding context CODING.
6206
6207    SRC_OBJECT is a buffer, a string, or Qnil.
6208
6209    If it is a buffer, the text is at point of the buffer.  FROM and TO
6210    are positions in the buffer.
6211
6212    If it is a string, the text is at the beginning of the string.
6213    FROM and TO are indices to the string.
6214
6215    If it is nil, the text is at coding->source.  FROM and TO are
6216    indices to coding->source.
6217
6218    DST_OBJECT is a buffer, Qt, or Qnil.
6219
6220    If it is a buffer, the decoded text is inserted at point of the
6221    buffer.  If the buffer is the same as SRC_OBJECT, the source text
6222    is deleted.
6223
6224    If it is Qt, a string is made from the decoded text, and
6225    set in CODING->dst_object.
6226
6227    If it is Qnil, the decoded text is stored at CODING->destination.
6228    The caller must allocate CODING->dst_bytes bytes at
6229    CODING->destination by xmalloc.  If the decoded text is longer than
6230    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6231  */
6232
6233 void
6234 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6235                       dst_object)
6236      struct coding_system *coding;
6237      Lisp_Object src_object;
6238      EMACS_INT from, from_byte, to, to_byte;
6239      Lisp_Object dst_object;
6240 {
6241   int count = specpdl_ptr - specpdl;
6242   unsigned char *destination;
6243   EMACS_INT dst_bytes;
6244   EMACS_INT chars = to - from;
6245   EMACS_INT bytes = to_byte - from_byte;
6246   Lisp_Object attrs;
6247
6248   saved_coding = coding;
6249   record_unwind_protect (code_conversion_restore, save_excursion_save ());
6250
6251   if (NILP (dst_object))
6252     {
6253       destination = coding->destination;
6254       dst_bytes = coding->dst_bytes;
6255     }
6256
6257   coding->src_object = src_object;
6258   coding->src_chars = chars;
6259   coding->src_bytes = bytes;
6260   coding->src_multibyte = chars < bytes;
6261
6262   if (STRINGP (src_object))
6263     {
6264       coding->src_pos = from;
6265       coding->src_pos_byte = from_byte;
6266     }
6267   else if (BUFFERP (src_object))
6268     {
6269       set_buffer_internal (XBUFFER (src_object));
6270       if (from != GPT)
6271         move_gap_both (from, from_byte);
6272       if (EQ (src_object, dst_object))
6273         {
6274           TEMP_SET_PT_BOTH (from, from_byte);
6275           del_range_both (from, from_byte, to, to_byte, 1);
6276           coding->src_pos = -chars;
6277           coding->src_pos_byte = -bytes;
6278         }
6279       else
6280         {
6281           coding->src_pos = from;
6282           coding->src_pos_byte = from_byte;
6283         }
6284     }
6285
6286   if (CODING_REQUIRE_DETECTION (coding))
6287     detect_coding (coding);
6288   attrs = CODING_ID_ATTRS (coding->id);
6289
6290   if (EQ (dst_object, Qt)
6291       || (! NILP (CODING_ATTR_POST_READ (attrs))
6292           && NILP (dst_object)))
6293     {
6294       coding->dst_object = make_conversion_work_buffer (1);
6295       coding->dst_pos = BEG;
6296       coding->dst_pos_byte = BEG_BYTE;
6297       coding->dst_multibyte = 1;
6298     }
6299   else if (BUFFERP (dst_object))
6300     {
6301       coding->dst_object = dst_object;
6302       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6303       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6304       coding->dst_multibyte
6305         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6306     }
6307   else
6308     {
6309       coding->dst_object = Qnil;
6310       coding->dst_multibyte = 1;
6311     }
6312
6313   decode_coding (coding);
6314
6315   if (BUFFERP (coding->dst_object))
6316     set_buffer_internal (XBUFFER (coding->dst_object));
6317
6318   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6319     {
6320       struct gcpro gcpro1, gcpro2;
6321       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6322       Lisp_Object val;
6323
6324       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6325       GCPRO2 (coding->src_object, coding->dst_object);
6326       val = call1 (CODING_ATTR_POST_READ (attrs),
6327                    make_number (coding->produced_char));
6328       UNGCPRO;
6329       CHECK_NATNUM (val);
6330       coding->produced_char += Z - prev_Z;
6331       coding->produced += Z_BYTE - prev_Z_BYTE;
6332     }
6333
6334   if (EQ (dst_object, Qt))
6335     {
6336       coding->dst_object = Fbuffer_string ();
6337     }
6338   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6339     {
6340       set_buffer_internal (XBUFFER (coding->dst_object));
6341       if (dst_bytes < coding->produced)
6342         {
6343           destination
6344             = (unsigned char *) xrealloc (destination, coding->produced);
6345           if (! destination)
6346             {
6347               coding->result = CODING_RESULT_INSUFFICIENT_DST;
6348               unbind_to (count, Qnil);
6349               return;
6350             }
6351           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6352             move_gap_both (BEGV, BEGV_BYTE);
6353           bcopy (BEGV_ADDR, destination, coding->produced);
6354           coding->destination = destination;
6355         }
6356     }
6357
6358   unbind_to (count, Qnil);
6359 }
6360
6361
6362 void
6363 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6364                       dst_object)
6365      struct coding_system *coding;
6366      Lisp_Object src_object;
6367      EMACS_INT from, from_byte, to, to_byte;
6368      Lisp_Object dst_object;
6369 {
6370   int count = specpdl_ptr - specpdl;
6371   EMACS_INT chars = to - from;
6372   EMACS_INT bytes = to_byte - from_byte;
6373   Lisp_Object attrs;
6374
6375   saved_coding = coding;
6376   record_unwind_protect (code_conversion_restore, save_excursion_save ());
6377
6378   coding->src_object = src_object;
6379   coding->src_chars = chars;
6380   coding->src_bytes = bytes;
6381   coding->src_multibyte = chars < bytes;
6382
6383   attrs = CODING_ID_ATTRS (coding->id);
6384
6385   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6386     {
6387       coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
6388       set_buffer_internal (XBUFFER (coding->src_object));
6389       if (STRINGP (src_object))
6390         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6391       else if (BUFFERP (src_object))
6392         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6393       else
6394         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6395
6396       if (EQ (src_object, dst_object))
6397         {
6398           set_buffer_internal (XBUFFER (src_object));
6399           del_range_both (from, from_byte, to, to_byte, 1);
6400           set_buffer_internal (XBUFFER (coding->src_object));
6401         }
6402
6403       call2 (CODING_ATTR_PRE_WRITE (attrs),
6404              make_number (BEG), make_number (Z));
6405       coding->src_object = Fcurrent_buffer ();
6406       if (BEG != GPT)
6407         move_gap_both (BEG, BEG_BYTE);
6408       coding->src_chars = Z - BEG;
6409       coding->src_bytes = Z_BYTE - BEG_BYTE;
6410       coding->src_pos = BEG;
6411       coding->src_pos_byte = BEG_BYTE;
6412       coding->src_multibyte = Z < Z_BYTE;
6413     }
6414   else if (STRINGP (src_object))
6415     {
6416       coding->src_pos = from;
6417       coding->src_pos_byte = from_byte;
6418     }
6419   else if (BUFFERP (src_object))
6420     {
6421       set_buffer_internal (XBUFFER (src_object));
6422       if (EQ (src_object, dst_object))
6423         {
6424           coding->src_object = del_range_1 (from, to, 1, 1);
6425           coding->src_pos = 0;
6426           coding->src_pos_byte = 0;
6427         }
6428       else
6429         {
6430           if (from < GPT && to >= GPT)
6431             move_gap_both (from, from_byte);
6432           coding->src_pos = from;
6433           coding->src_pos_byte = from_byte;
6434         }
6435     }
6436
6437   if (BUFFERP (dst_object))
6438     {
6439       coding->dst_object = dst_object;
6440       if (EQ (src_object, dst_object))
6441         {
6442           coding->dst_pos = from;
6443           coding->dst_pos_byte = from_byte;
6444         }
6445       else
6446         {
6447           coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6448           coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6449         }
6450       coding->dst_multibyte
6451         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6452     }
6453   else if (EQ (dst_object, Qt))
6454     {
6455       coding->dst_object = Qnil;
6456       coding->dst_bytes = coding->src_chars;
6457       if (coding->dst_bytes == 0)
6458         coding->dst_bytes = 1;
6459       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
6460       coding->dst_multibyte = 0;
6461     }
6462   else
6463     {
6464       coding->dst_object = Qnil;
6465       coding->dst_multibyte = 0;
6466     }
6467
6468   encode_coding (coding);
6469
6470   if (EQ (dst_object, Qt))
6471     {
6472       if (BUFFERP (coding->dst_object))
6473         coding->dst_object = Fbuffer_string ();
6474       else
6475         {
6476           coding->dst_object
6477             = make_unibyte_string ((char *) coding->destination,
6478                                    coding->produced);
6479           xfree (coding->destination);
6480         }
6481     }
6482
6483   unbind_to (count, Qnil);
6484 }
6485
6486
6487 Lisp_Object
6488 preferred_coding_system ()
6489 {
6490   int id = coding_categories[coding_priorities[0]].id;
6491
6492   return CODING_ID_NAME (id);
6493 }
6494
6495 \f
6496 #ifdef emacs
6497 /*** 8. Emacs Lisp library functions ***/
6498
6499 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6500        doc: /* Return t if OBJECT is nil or a coding-system.
6501 See the documentation of `define-coding-system' for information
6502 about coding-system objects.  */)
6503      (obj)
6504      Lisp_Object obj;
6505 {
6506   return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6507 }
6508
6509 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6510        Sread_non_nil_coding_system, 1, 1, 0,
6511        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6512      (prompt)
6513      Lisp_Object prompt;
6514 {
6515   Lisp_Object val;
6516   do
6517     {
6518       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6519                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6520     }
6521   while (XSTRING (val)->size == 0);
6522   return (Fintern (val, Qnil));
6523 }
6524
6525 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6526        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6527 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6528      (prompt, default_coding_system)
6529      Lisp_Object prompt, default_coding_system;
6530 {
6531   Lisp_Object val;
6532   if (SYMBOLP (default_coding_system))
6533     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
6534   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6535                           Qt, Qnil, Qcoding_system_history,
6536                           default_coding_system, Qnil);
6537   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
6538 }
6539
6540 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6541        1, 1, 0,
6542        doc: /* Check validity of CODING-SYSTEM.
6543 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.  */)
6544   (coding_system)
6545      Lisp_Object coding_system;
6546 {
6547   CHECK_SYMBOL (coding_system);
6548   if (!NILP (Fcoding_system_p (coding_system)))
6549     return coding_system;
6550   while (1)
6551     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6552 }
6553
6554 \f
6555 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
6556    HIGHEST is nonzero, return the coding system of the highest
6557    priority among the detected coding systems.  Otherwize return a
6558    list of detected coding systems sorted by their priorities.  If
6559    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6560    multibyte form but contains only ASCII and eight-bit chars.
6561    Otherwise, the bytes are raw bytes.
6562
6563    CODING-SYSTEM controls the detection as below:
6564
6565    If it is nil, detect both text-format and eol-format.  If the
6566    text-format part of CODING-SYSTEM is already specified
6567    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
6568    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6569    detect only text-format.  */
6570
6571 Lisp_Object
6572 detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
6573      unsigned char *src;
6574      int src_bytes, highest;
6575      int multibytep;
6576      Lisp_Object coding_system;
6577 {
6578   unsigned char *src_end = src + src_bytes;
6579   int mask = CATEGORY_MASK_ANY;
6580   Lisp_Object attrs, eol_type;
6581   Lisp_Object val;
6582   struct coding_system coding;
6583   int id;
6584   struct coding_detection_info detect_info;
6585
6586   if (NILP (coding_system))
6587     coding_system = Qundecided;
6588   setup_coding_system (coding_system, &coding);
6589   attrs = CODING_ID_ATTRS (coding.id);
6590   eol_type = CODING_ID_EOL_TYPE (coding.id);
6591   coding_system = CODING_ATTR_BASE_NAME (attrs);
6592
6593   coding.source = src;
6594   coding.src_bytes = src_bytes;
6595   coding.src_multibyte = multibytep;
6596   coding.consumed = 0;
6597   coding.mode |= CODING_MODE_LAST_BLOCK;
6598
6599   detect_info.checked = detect_info.found = detect_info.rejected = 0;
6600
6601   /* At first, detect text-format if necessary.  */
6602   if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided)
6603     {
6604       enum coding_category category;
6605       struct coding_system *this;
6606       int c, i;
6607
6608       for (; src < src_end; src++)
6609         {
6610           c = *src;
6611           if (c & 0x80
6612               || (c < 0x20 && (c == ISO_CODE_ESC
6613                                || c == ISO_CODE_SI
6614                                || c == ISO_CODE_SO)))
6615             break;
6616         }
6617       coding.head_ascii = src - coding.source;
6618
6619       if (src < src_end)
6620         for (i = 0; i < coding_category_raw_text; i++)
6621           {
6622             category = coding_priorities[i];
6623             this = coding_categories + category;
6624
6625             if (this->id < 0)
6626               {
6627                 /* No coding system of this category is defined.  */
6628                 detect_info.rejected |= (1 << category);
6629               }
6630             else if (category >= coding_category_raw_text)
6631               continue;
6632             else if (detect_info.checked & (1 << category))
6633               {
6634                 if (highest
6635                     && (detect_info.found & (1 << category)))
6636                   break;
6637               }
6638             else
6639               {
6640                 if ((*(this->detector)) (&coding, &detect_info)
6641                     && highest
6642                     && (detect_info.found & (1 << category)))
6643                   break;
6644               }
6645           }
6646
6647
6648       if (detect_info.rejected == CATEGORY_MASK_ANY)
6649         {
6650           detect_info.found = CATEGORY_MASK_RAW_TEXT;
6651           id = coding_categories[coding_category_raw_text].id;
6652           val = Fcons (make_number (id), Qnil);
6653         }
6654       else if (! detect_info.rejected && ! detect_info.found)
6655         {
6656           detect_info.found = CATEGORY_MASK_ANY;
6657           id = coding_categories[coding_category_undecided].id;
6658           val = Fcons (make_number (id), Qnil);
6659         }
6660       else if (highest)
6661         {
6662           if (detect_info.found)
6663             {
6664               detect_info.found = 1 << category;
6665               val = Fcons (make_number (this->id), Qnil);
6666             }
6667           else
6668             for (i = 0; i < coding_category_raw_text; i++)
6669               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6670                 {
6671                   detect_info.found = 1 << coding_priorities[i];
6672                   id = coding_categories[coding_priorities[i]].id;
6673                   val = Fcons (make_number (id), Qnil);
6674                   break;
6675                 }
6676         }
6677       else
6678         {
6679           int mask = detect_info.rejected | detect_info.found;
6680           int found = 0;
6681           val = Qnil;
6682
6683           for (i = coding_category_raw_text - 1; i >= 0; i--)
6684             {
6685               category = coding_priorities[i];
6686               if (! (mask & (1 << category)))
6687                 {
6688                   found |= 1 << category;
6689                   id = coding_categories[category].id;
6690                   val = Fcons (make_number (id), val);
6691                 }
6692             }
6693           for (i = coding_category_raw_text - 1; i >= 0; i--)
6694             {
6695               category = coding_priorities[i];
6696               if (detect_info.found & (1 << category))
6697                 {
6698                   id = coding_categories[category].id;
6699                   val = Fcons (make_number (id), val);
6700                 }
6701             }
6702           detect_info.found |= found;
6703         }
6704     }
6705   else
6706     {
6707       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
6708       val = Fcons (make_number (coding.id), Qnil);
6709     }
6710
6711   /* Then, detect eol-format if necessary.  */
6712   {
6713     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
6714     Lisp_Object tail;
6715
6716     if (VECTORP (eol_type))
6717       {
6718         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
6719           normal_eol = detect_eol (coding.source, src_bytes,
6720                                    coding_category_raw_text);
6721         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
6722                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
6723           utf_16_be_eol = detect_eol (coding.source, src_bytes,
6724                                       coding_category_utf_16_be);
6725         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
6726                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
6727           utf_16_le_eol = detect_eol (coding.source, src_bytes,
6728                                       coding_category_utf_16_le);
6729       }
6730     else
6731       {
6732         if (EQ (eol_type, Qunix))
6733           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
6734         else if (EQ (eol_type, Qdos))
6735           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
6736         else
6737           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
6738       }
6739
6740     for (tail = val; CONSP (tail); tail = XCDR (tail))
6741       {
6742         enum coding_category category;
6743         int this_eol;
6744
6745         id = XINT (XCAR (tail));
6746         attrs = CODING_ID_ATTRS (id);
6747         category = XINT (CODING_ATTR_CATEGORY (attrs));
6748         eol_type = CODING_ID_EOL_TYPE (id);
6749         if (VECTORP (eol_type))
6750           {
6751             if (category == coding_category_utf_16_be
6752                 || category == coding_category_utf_16_be_nosig)
6753               this_eol = utf_16_be_eol;
6754             else if (category == coding_category_utf_16_le
6755                      || category == coding_category_utf_16_le_nosig)
6756               this_eol = utf_16_le_eol;
6757             else
6758               this_eol = normal_eol;
6759
6760             if (this_eol == EOL_SEEN_LF)
6761               XSETCAR (tail, AREF (eol_type, 0));
6762             else if (this_eol == EOL_SEEN_CRLF)
6763               XSETCAR (tail, AREF (eol_type, 1));
6764             else if (this_eol == EOL_SEEN_CR)
6765               XSETCAR (tail, AREF (eol_type, 2));
6766             else
6767               XSETCAR (tail, CODING_ID_NAME (id));
6768           }
6769         else
6770           XSETCAR (tail, CODING_ID_NAME (id));
6771       }
6772   }
6773
6774   return (highest ? XCAR (val) : val);
6775 }
6776
6777
6778 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6779        2, 3, 0,
6780        doc: /* Detect coding system of the text in the region between START and END.
6781 Return a list of possible coding systems ordered by priority.
6782
6783 If only ASCII characters are found, it returns a list of single element
6784 `undecided' or its subsidiary coding system according to a detected
6785 end-of-line format.
6786
6787 If optional argument HIGHEST is non-nil, return the coding system of
6788 highest priority.  */)
6789      (start, end, highest)
6790      Lisp_Object start, end, highest;
6791 {
6792   int from, to;
6793   int from_byte, to_byte;
6794
6795   CHECK_NUMBER_COERCE_MARKER (start);
6796   CHECK_NUMBER_COERCE_MARKER (end);
6797
6798   validate_region (&start, &end);
6799   from = XINT (start), to = XINT (end);
6800   from_byte = CHAR_TO_BYTE (from);
6801   to_byte = CHAR_TO_BYTE (to);
6802
6803   if (from < GPT && to >= GPT)
6804     move_gap_both (to, to_byte);
6805
6806   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6807                                to_byte - from_byte,
6808                                !NILP (highest),
6809                                !NILP (current_buffer
6810                                       ->enable_multibyte_characters),
6811                                Qnil);
6812 }
6813
6814 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6815        1, 2, 0,
6816        doc: /* Detect coding system of the text in STRING.
6817 Return a list of possible coding systems ordered by priority.
6818
6819 If only ASCII characters are found, it returns a list of single element
6820 `undecided' or its subsidiary coding system according to a detected
6821 end-of-line format.
6822
6823 If optional argument HIGHEST is non-nil, return the coding system of
6824 highest priority.  */)
6825      (string, highest)
6826      Lisp_Object string, highest;
6827 {
6828   CHECK_STRING (string);
6829
6830   return detect_coding_system (XSTRING (string)->data,
6831                                STRING_BYTES (XSTRING (string)),
6832                                !NILP (highest),
6833                                STRING_MULTIBYTE (string),
6834                                Qnil);
6835 }
6836
6837
6838 static INLINE int
6839 char_encodable_p (c, attrs)
6840      int c;
6841      Lisp_Object attrs;
6842 {
6843   Lisp_Object tail;
6844   struct charset *charset;
6845
6846   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6847        CONSP (tail); tail = XCDR (tail))
6848     {
6849       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6850       if (CHAR_CHARSET_P (c, charset))
6851         break;
6852     }
6853   return (! NILP (tail));
6854 }
6855
6856
6857 /* Return a list of coding systems that safely encode the text between
6858    START and END.  If EXCLUDE is non-nil, it is a list of coding
6859    systems not to check.  The returned list doesn't contain any such
6860    coding systems.  In any case, if the text contains only ASCII or is
6861    unibyte, return t.  */
6862
6863 DEFUN ("find-coding-systems-region-internal",
6864        Ffind_coding_systems_region_internal,
6865        Sfind_coding_systems_region_internal, 2, 3, 0,
6866        doc: /* Internal use only.  */)
6867      (start, end, exclude)
6868      Lisp_Object start, end, exclude;
6869 {
6870   Lisp_Object coding_attrs_list, safe_codings;
6871   EMACS_INT start_byte, end_byte;
6872   const unsigned char *p, *pbeg, *pend;
6873   int c;
6874   Lisp_Object tail, elt;
6875
6876   if (STRINGP (start))
6877     {
6878       if (!STRING_MULTIBYTE (start)
6879           || XSTRING (start)->size == STRING_BYTES (XSTRING (start)))
6880         return Qt;
6881       start_byte = 0;
6882       end_byte = STRING_BYTES (XSTRING (start));
6883     }
6884   else
6885     {
6886       CHECK_NUMBER_COERCE_MARKER (start);
6887       CHECK_NUMBER_COERCE_MARKER (end);
6888       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6889         args_out_of_range (start, end);
6890       if (NILP (current_buffer->enable_multibyte_characters))
6891         return Qt;
6892       start_byte = CHAR_TO_BYTE (XINT (start));
6893       end_byte = CHAR_TO_BYTE (XINT (end));
6894       if (XINT (end) - XINT (start) == end_byte - start_byte)
6895         return Qt;
6896
6897       if (XINT (start) < GPT && XINT (end) > GPT)
6898         {
6899           if ((GPT - XINT (start)) < (XINT (end) - GPT))
6900             move_gap_both (XINT (start), start_byte);
6901           else
6902             move_gap_both (XINT (end), end_byte);
6903         }
6904     }
6905
6906   coding_attrs_list = Qnil;
6907   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6908     if (NILP (exclude)
6909         || NILP (Fmemq (XCAR (tail), exclude)))
6910       {
6911         Lisp_Object attrs;
6912
6913         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6914         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6915             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6916           coding_attrs_list = Fcons (attrs, coding_attrs_list);
6917       }
6918
6919   if (STRINGP (start))
6920     p = pbeg = XSTRING (start)->data;
6921   else
6922     p = pbeg = BYTE_POS_ADDR (start_byte);
6923   pend = p + (end_byte - start_byte);
6924
6925   while (p < pend && ASCII_BYTE_P (*p)) p++;
6926   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6927
6928   while (p < pend)
6929     {
6930       if (ASCII_BYTE_P (*p))
6931         p++;
6932       else
6933         {
6934           c = STRING_CHAR_ADVANCE (p);
6935
6936           charset_map_loaded = 0;
6937           for (tail = coding_attrs_list; CONSP (tail);)
6938             {
6939               elt = XCAR (tail);
6940               if (NILP (elt))
6941                 tail = XCDR (tail);
6942               else if (char_encodable_p (c, elt))
6943                 tail = XCDR (tail);
6944               else if (CONSP (XCDR (tail)))
6945                 {
6946                   XSETCAR (tail, XCAR (XCDR (tail)));
6947                   XSETCDR (tail, XCDR (XCDR (tail)));
6948                 }
6949               else
6950                 {
6951                   XSETCAR (tail, Qnil);
6952                   tail = XCDR (tail);
6953                 }
6954             }
6955           if (charset_map_loaded)
6956             {
6957               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6958
6959               if (STRINGP (start))
6960                 pbeg = XSTRING (start)->data;
6961               else
6962                 pbeg = BYTE_POS_ADDR (start_byte);
6963               p = pbeg + p_offset;
6964               pend = pbeg + pend_offset;
6965             }
6966         }
6967     }
6968
6969   safe_codings = Qnil;
6970   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6971     if (! NILP (XCAR (tail)))
6972       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6973
6974   return safe_codings;
6975 }
6976
6977
6978 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6979        Scheck_coding_systems_region, 3, 3, 0,
6980        doc: /* Check if the region is encodable by coding systems.
6981
6982 START and END are buffer positions specifying the region.
6983 CODING-SYSTEM-LIST is a list of coding systems to check.
6984
6985 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6986 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6987 whole region, POS0, POS1, ... are buffer positions where non-encodable
6988 characters are found.
6989
6990 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6991 value is nil.
6992
6993 START may be a string.  In that case, check if the string is
6994 encodable, and the value contains indices to the string instead of
6995 buffer positions.  END is ignored.  */)
6996      (start, end, coding_system_list)
6997      Lisp_Object start, end, coding_system_list;
6998 {
6999   Lisp_Object list;
7000   EMACS_INT start_byte, end_byte;
7001   int pos;
7002   const unsigned char *p, *pbeg, *pend;
7003   int c;
7004   Lisp_Object tail, elt;
7005
7006   if (STRINGP (start))
7007     {
7008       if (!STRING_MULTIBYTE (start)
7009           && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
7010         return Qnil;
7011       start_byte = 0;
7012       end_byte = STRING_BYTES (XSTRING (start));
7013       pos = 0;
7014     }
7015   else
7016     {
7017       CHECK_NUMBER_COERCE_MARKER (start);
7018       CHECK_NUMBER_COERCE_MARKER (end);
7019       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7020         args_out_of_range (start, end);
7021       if (NILP (current_buffer->enable_multibyte_characters))
7022         return Qnil;
7023       start_byte = CHAR_TO_BYTE (XINT (start));
7024       end_byte = CHAR_TO_BYTE (XINT (end));
7025       if (XINT (end) - XINT (start) == end_byte - start_byte)
7026         return Qt;
7027
7028       if (XINT (start) < GPT && XINT (end) > GPT)
7029         {
7030           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7031             move_gap_both (XINT (start), start_byte);
7032           else
7033             move_gap_both (XINT (end), end_byte);
7034         }
7035       pos = XINT (start);
7036     }
7037
7038   list = Qnil;
7039   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7040     {
7041       elt = XCAR (tail);
7042       list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
7043                                        Qnil)),
7044                     list);
7045     }
7046
7047   if (STRINGP (start))
7048     p = pbeg = XSTRING (start)->data;
7049   else
7050     p = pbeg = BYTE_POS_ADDR (start_byte);
7051   pend = p + (end_byte - start_byte);
7052
7053   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7054   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7055
7056   while (p < pend)
7057     {
7058       if (ASCII_BYTE_P (*p))
7059         p++;
7060       else
7061         {
7062           c = STRING_CHAR_ADVANCE (p);
7063
7064           charset_map_loaded = 0;
7065           for (tail = list; CONSP (tail); tail = XCDR (tail))
7066             {
7067               elt = XCDR (XCAR (tail));
7068               if (! char_encodable_p (c, XCAR (elt)))
7069                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7070             }
7071           if (charset_map_loaded)
7072             {
7073               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7074
7075               if (STRINGP (start))
7076                 pbeg = XSTRING (start)->data;
7077               else
7078                 pbeg = BYTE_POS_ADDR (start_byte);
7079               p = pbeg + p_offset;
7080               pend = pbeg + pend_offset;
7081             }
7082         }
7083       pos++;
7084     }
7085
7086   tail = list;
7087   list = Qnil;
7088   for (; CONSP (tail); tail = XCDR (tail))
7089     {
7090       elt = XCAR (tail);
7091       if (CONSP (XCDR (XCDR (elt))))
7092         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7093                       list);
7094     }
7095
7096   return list;
7097 }
7098
7099
7100
7101 Lisp_Object
7102 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7103      Lisp_Object start, end, coding_system, dst_object;
7104      int encodep, norecord;
7105 {
7106   struct coding_system coding;
7107   EMACS_INT from, from_byte, to, to_byte;
7108   Lisp_Object src_object;
7109
7110   CHECK_NUMBER_COERCE_MARKER (start);
7111   CHECK_NUMBER_COERCE_MARKER (end);
7112   if (NILP (coding_system))
7113     coding_system = Qno_conversion;
7114   else
7115     CHECK_CODING_SYSTEM (coding_system);
7116   src_object = Fcurrent_buffer ();
7117   if (NILP (dst_object))
7118     dst_object = src_object;
7119   else if (! EQ (dst_object, Qt))
7120     CHECK_BUFFER (dst_object);
7121
7122   validate_region (&start, &end);
7123   from = XFASTINT (start);
7124   from_byte = CHAR_TO_BYTE (from);
7125   to = XFASTINT (end);
7126   to_byte = CHAR_TO_BYTE (to);
7127
7128   setup_coding_system (coding_system, &coding);
7129   coding.mode |= CODING_MODE_LAST_BLOCK;
7130
7131   if (encodep)
7132     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7133                           dst_object);
7134   else
7135     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7136                           dst_object);
7137   if (! norecord)
7138     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7139
7140   if (coding.result != CODING_RESULT_SUCCESS)
7141     error ("Code conversion error: %d", coding.result);
7142
7143   return (BUFFERP (dst_object)
7144           ? make_number (coding.produced_char)
7145           : coding.dst_object);
7146 }
7147
7148
7149 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
7150        3, 4, "r\nzCoding system: ",
7151        doc: /* Decode the current region from the specified coding system.
7152 When called from a program, takes four arguments:
7153         START, END, CODING-SYSTEM, and DESTINATION.
7154 START and END are buffer positions.
7155
7156 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7157 If nil, the region between START and END is replace by the decoded text.
7158 If buffer, the decoded text is inserted in the buffer.
7159 If t, the decoded text is returned.
7160
7161 This function sets `last-coding-system-used' to the precise coding system
7162 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7163 not fully specified.)
7164 It returns the length of the decoded text.  */)
7165      (start, end, coding_system, destination)
7166      Lisp_Object start, end, coding_system, destination;
7167 {
7168   return code_convert_region (start, end, coding_system, destination, 0, 0);
7169 }
7170
7171 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
7172        3, 4, "r\nzCoding system: ",
7173        doc: /* Encode the current region by specified coding system.
7174 When called from a program, takes three arguments:
7175 START, END, and CODING-SYSTEM.  START and END are buffer positions.
7176
7177 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7178 If nil, the region between START and END is replace by the encoded text.
7179 If buffer, the encoded text is inserted in the buffer.
7180 If t, the encoded text is returned.
7181
7182 This function sets `last-coding-system-used' to the precise coding system
7183 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7184 not fully specified.)
7185 It returns the length of the encoded text.  */)
7186   (start, end, coding_system, destination)
7187      Lisp_Object start, end, coding_system, destination;
7188 {
7189   return code_convert_region (start, end, coding_system, destination, 1, 0);
7190 }
7191
7192 Lisp_Object
7193 code_convert_string (string, coding_system, dst_object,
7194                      encodep, nocopy, norecord)
7195      Lisp_Object string, coding_system, dst_object;
7196      int encodep, nocopy, norecord;
7197 {
7198   struct coding_system coding;
7199   EMACS_INT chars, bytes;
7200
7201   CHECK_STRING (string);
7202   if (NILP (coding_system))
7203     {
7204       if (! norecord)
7205         Vlast_coding_system_used = Qno_conversion;
7206       if (NILP (dst_object))
7207         return (nocopy ? Fcopy_sequence (string) : string);
7208     }
7209
7210   if (NILP (coding_system))
7211     coding_system = Qno_conversion;
7212   else
7213     CHECK_CODING_SYSTEM (coding_system);
7214   if (NILP (dst_object))
7215     dst_object = Qt;
7216   else if (! EQ (dst_object, Qt))
7217     CHECK_BUFFER (dst_object);
7218
7219   setup_coding_system (coding_system, &coding);
7220   coding.mode |= CODING_MODE_LAST_BLOCK;
7221   chars = XSTRING (string)->size;
7222   bytes = STRING_BYTES (XSTRING (string));
7223   if (encodep)
7224     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7225   else
7226     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7227   if (! norecord)
7228     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7229
7230   if (coding.result != CODING_RESULT_SUCCESS)
7231     error ("Code conversion error: %d", coding.result);
7232
7233   return (BUFFERP (dst_object)
7234           ? make_number (coding.produced_char)
7235           : coding.dst_object);
7236 }
7237
7238
7239 /* Encode or decode STRING according to CODING_SYSTEM.
7240    Do not set Vlast_coding_system_used.
7241
7242    This function is called only from macros DECODE_FILE and
7243    ENCODE_FILE, thus we ignore character composition.  */
7244
7245 Lisp_Object
7246 code_convert_string_norecord (string, coding_system, encodep)
7247      Lisp_Object string, coding_system;
7248      int encodep;
7249 {
7250   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
7251 }
7252
7253
7254 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7255        2, 4, 0,
7256        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7257
7258 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7259 if the decoding operation is trivial.
7260
7261 Optional fourth arg BUFFER non-nil meant that the decoded text is
7262 inserted in BUFFER instead of returned as a string.  In this case,
7263 the return value is BUFFER.
7264
7265 This function sets `last-coding-system-used' to the precise coding system
7266 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7267 not fully specified.  */)
7268   (string, coding_system, nocopy, buffer)
7269      Lisp_Object string, coding_system, nocopy, buffer;
7270 {
7271   return code_convert_string (string, coding_system, buffer,
7272                               0, ! NILP (nocopy), 0);
7273 }
7274
7275 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7276        2, 4, 0,
7277        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7278
7279 Optional third arg NOCOPY non-nil means it is OK to return STRING
7280 itself if the encoding operation is trivial.
7281
7282 Optional fourth arg BUFFER non-nil meant that the encoded text is
7283 inserted in BUFFER instead of returned as a string.  In this case,
7284 the return value is BUFFER.
7285
7286 This function sets `last-coding-system-used' to the precise coding system
7287 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7288 not fully specified.)  */)
7289      (string, coding_system, nocopy, buffer)
7290      Lisp_Object string, coding_system, nocopy, buffer;
7291 {
7292   return code_convert_string (string, coding_system, buffer,
7293                               1, ! NILP (nocopy), 1);
7294 }
7295
7296 \f
7297 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7298        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7299 Return the corresponding character.  */)
7300      (code)
7301      Lisp_Object code;
7302 {
7303   Lisp_Object spec, attrs, val;
7304   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7305   int c;
7306
7307   CHECK_NATNUM (code);
7308   c = XFASTINT (code);
7309   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7310   attrs = AREF (spec, 0);
7311
7312   if (ASCII_BYTE_P (c)
7313       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7314     return code;
7315
7316   val = CODING_ATTR_CHARSET_LIST (attrs);
7317   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7318   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7319   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
7320
7321   if (c <= 0x7F)
7322     charset = charset_roman;
7323   else if (c >= 0xA0 && c < 0xDF)
7324     {
7325       charset = charset_kana;
7326       c -= 0x80;
7327     }
7328   else
7329     {
7330       int s1 = c >> 8, s2 = c & 0xFF;
7331
7332       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7333           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7334         error ("Invalid code: %d", code);
7335       SJIS_TO_JIS (c);
7336       charset = charset_kanji;
7337     }
7338   c = DECODE_CHAR (charset, c);
7339   if (c < 0)
7340     error ("Invalid code: %d", code);
7341   return make_number (c);
7342 }
7343
7344
7345 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7346        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7347 Return the corresponding code in SJIS.  */)
7348      (ch)
7349     Lisp_Object ch;
7350 {
7351   Lisp_Object spec, attrs, charset_list;
7352   int c;
7353   struct charset *charset;
7354   unsigned code;
7355
7356   CHECK_CHARACTER (ch);
7357   c = XFASTINT (ch);
7358   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7359   attrs = AREF (spec, 0);
7360
7361   if (ASCII_CHAR_P (c)
7362       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7363     return ch;
7364
7365   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7366   charset = char_charset (c, charset_list, &code);
7367   if (code == CHARSET_INVALID_CODE (charset))
7368     error ("Can't encode by shift_jis encoding: %d", c);
7369   JIS_TO_SJIS (code);
7370
7371   return make_number (code);
7372 }
7373
7374 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7375        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7376 Return the corresponding character.  */)
7377      (code)
7378      Lisp_Object code;
7379 {
7380   Lisp_Object spec, attrs, val;
7381   struct charset *charset_roman, *charset_big5, *charset;
7382   int c;
7383
7384   CHECK_NATNUM (code);
7385   c = XFASTINT (code);
7386   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7387   attrs = AREF (spec, 0);
7388
7389   if (ASCII_BYTE_P (c)
7390       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7391     return code;
7392
7393   val = CODING_ATTR_CHARSET_LIST (attrs);
7394   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7395   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7396
7397   if (c <= 0x7F)
7398     charset = charset_roman;
7399   else
7400     {
7401       int b1 = c >> 8, b2 = c & 0x7F;
7402       if (b1 < 0xA1 || b1 > 0xFE
7403           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7404         error ("Invalid code: %d", code);
7405       charset = charset_big5;
7406     }
7407   c = DECODE_CHAR (charset, (unsigned )c);
7408   if (c < 0)
7409     error ("Invalid code: %d", code);
7410   return make_number (c);
7411 }
7412
7413 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7414        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7415 Return the corresponding character code in Big5.  */)
7416      (ch)
7417      Lisp_Object ch;
7418 {
7419   Lisp_Object spec, attrs, charset_list;
7420   struct charset *charset;
7421   int c;
7422   unsigned code;
7423
7424   CHECK_CHARACTER (ch);
7425   c = XFASTINT (ch);
7426   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7427   attrs = AREF (spec, 0);
7428   if (ASCII_CHAR_P (c)
7429       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7430     return ch;
7431
7432   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7433   charset = char_charset (c, charset_list, &code);
7434   if (code == CHARSET_INVALID_CODE (charset))
7435     error ("Can't encode by Big5 encoding: %d", c);
7436
7437   return make_number (code);
7438 }
7439
7440 \f
7441 DEFUN ("set-terminal-coding-system-internal",
7442        Fset_terminal_coding_system_internal,
7443        Sset_terminal_coding_system_internal, 1, 1, 0,
7444        doc: /* Internal use only.  */)
7445      (coding_system)
7446      Lisp_Object coding_system;
7447 {
7448   CHECK_SYMBOL (coding_system);
7449   setup_coding_system (Fcheck_coding_system (coding_system),
7450                         &terminal_coding);
7451
7452   /* We had better not send unsafe characters to terminal.  */
7453   terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7454   /* Characer composition should be disabled.  */
7455   terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7456   terminal_coding.src_multibyte = 1;
7457   terminal_coding.dst_multibyte = 0;
7458   return Qnil;
7459 }
7460
7461 DEFUN ("set-safe-terminal-coding-system-internal",
7462        Fset_safe_terminal_coding_system_internal,
7463        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7464        doc: /* Internal use only.  */)
7465      (coding_system)
7466      Lisp_Object coding_system;
7467 {
7468   CHECK_SYMBOL (coding_system);
7469   setup_coding_system (Fcheck_coding_system (coding_system),
7470                        &safe_terminal_coding);
7471   /* Characer composition should be disabled.  */
7472   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7473   safe_terminal_coding.src_multibyte = 1;
7474   safe_terminal_coding.dst_multibyte = 0;
7475   return Qnil;
7476 }
7477
7478 DEFUN ("terminal-coding-system",
7479        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
7480        doc: /* Return coding system specified for terminal output.  */)
7481      ()
7482 {
7483   return CODING_ID_NAME (terminal_coding.id);
7484 }
7485
7486 DEFUN ("set-keyboard-coding-system-internal",
7487        Fset_keyboard_coding_system_internal,
7488        Sset_keyboard_coding_system_internal, 1, 1, 0,
7489        doc: /* Internal use only.  */)
7490      (coding_system)
7491      Lisp_Object coding_system;
7492 {
7493   CHECK_SYMBOL (coding_system);
7494   setup_coding_system (Fcheck_coding_system (coding_system),
7495                        &keyboard_coding);
7496   /* Characer composition should be disabled.  */
7497   keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7498   return Qnil;
7499 }
7500
7501 DEFUN ("keyboard-coding-system",
7502        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
7503        doc: /* Return coding system specified for decoding keyboard input.  */)
7504      ()
7505 {
7506   return CODING_ID_NAME (keyboard_coding.id);
7507 }
7508
7509 \f
7510 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7511        Sfind_operation_coding_system,  1, MANY, 0,
7512        doc: /* Choose a coding system for an operation based on the target name.
7513 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7514 DECODING-SYSTEM is the coding system to use for decoding
7515 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7516 for encoding (in case OPERATION does encoding).
7517
7518 The first argument OPERATION specifies an I/O primitive:
7519   For file I/O, `insert-file-contents' or `write-region'.
7520   For process I/O, `call-process', `call-process-region', or `start-process'.
7521   For network I/O, `open-network-stream'.
7522
7523 The remaining arguments should be the same arguments that were passed
7524 to the primitive.  Depending on which primitive, one of those arguments
7525 is selected as the TARGET.  For example, if OPERATION does file I/O,
7526 whichever argument specifies the file name is TARGET.
7527
7528 TARGET has a meaning which depends on OPERATION:
7529   For file I/O, TARGET is a file name.
7530   For process I/O, TARGET is a process name.
7531   For network I/O, TARGET is a service name or a port number
7532
7533 This function looks up what specified for TARGET in,
7534 `file-coding-system-alist', `process-coding-system-alist',
7535 or `network-coding-system-alist' depending on OPERATION.
7536 They may specify a coding system, a cons of coding systems,
7537 or a function symbol to call.
7538 In the last case, we call the function with one argument,
7539 which is a list of all the arguments given to this function.
7540
7541 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7542      (nargs, args)
7543      int nargs;
7544      Lisp_Object *args;
7545 {
7546   Lisp_Object operation, target_idx, target, val;
7547   register Lisp_Object chain;
7548
7549   if (nargs < 2)
7550     error ("Too few arguments");
7551   operation = args[0];
7552   if (!SYMBOLP (operation)
7553       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7554     error ("Invalid first arguement");
7555   if (nargs < 1 + XINT (target_idx))
7556     error ("Too few arguments for operation: %s",
7557            XSYMBOL (operation)->name->data);
7558   target = args[XINT (target_idx) + 1];
7559   if (!(STRINGP (target)
7560         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7561     error ("Invalid %dth argument", XINT (target_idx) + 1);
7562
7563   chain = ((EQ (operation, Qinsert_file_contents)
7564             || EQ (operation, Qwrite_region))
7565            ? Vfile_coding_system_alist
7566            : (EQ (operation, Qopen_network_stream)
7567               ? Vnetwork_coding_system_alist
7568               : Vprocess_coding_system_alist));
7569   if (NILP (chain))
7570     return Qnil;
7571
7572   for (; CONSP (chain); chain = XCDR (chain))
7573     {
7574       Lisp_Object elt;
7575
7576       elt = XCAR (chain);
7577       if (CONSP (elt)
7578           && ((STRINGP (target)
7579                && STRINGP (XCAR (elt))
7580                && fast_string_match (XCAR (elt), target) >= 0)
7581               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7582         {
7583           val = XCDR (elt);
7584           /* Here, if VAL is both a valid coding system and a valid
7585              function symbol, we return VAL as a coding system.  */
7586           if (CONSP (val))
7587             return val;
7588           if (! SYMBOLP (val))
7589             return Qnil;
7590           if (! NILP (Fcoding_system_p (val)))
7591             return Fcons (val, val);
7592           if (! NILP (Ffboundp (val)))
7593             {
7594               val = call1 (val, Flist (nargs, args));
7595               if (CONSP (val))
7596                 return val;
7597               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7598                 return Fcons (val, val);
7599             }
7600           return Qnil;
7601         }
7602     }
7603   return Qnil;
7604 }
7605
7606 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7607        Sset_coding_system_priority, 0, MANY, 0,
7608        doc: /* Assign higher priority to the coding systems given as arguments.
7609 usage: (set-coding-system-priority CODING-SYSTEM ...)  */)
7610      (nargs, args)
7611      int nargs;
7612      Lisp_Object *args;
7613 {
7614   int i, j;
7615   int changed[coding_category_max];
7616   enum coding_category priorities[coding_category_max];
7617
7618   bzero (changed, sizeof changed);
7619
7620   for (i = j = 0; i < nargs; i++)
7621     {
7622       enum coding_category category;
7623       Lisp_Object spec, attrs;
7624
7625       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7626       attrs = AREF (spec, 0);
7627       category = XINT (CODING_ATTR_CATEGORY (attrs));
7628       if (changed[category])
7629         /* Ignore this coding system because a coding system of the
7630            same category already had a higher priority.  */
7631         continue;
7632       changed[category] = 1;
7633       priorities[j++] = category;
7634       if (coding_categories[category].id >= 0
7635           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7636         setup_coding_system (args[i], &coding_categories[category]);
7637     }
7638
7639   /* Now we have decided top J priorities.  Reflect the order of the
7640      original priorities to the remaining priorities.  */
7641
7642   for (i = j, j = 0; i < coding_category_max; i++, j++)
7643     {
7644       while (j < coding_category_max
7645              && changed[coding_priorities[j]])
7646         j++;
7647       if (j == coding_category_max)
7648         abort ();
7649       priorities[i] = coding_priorities[j];
7650     }
7651
7652   bcopy (priorities, coding_priorities, sizeof priorities);
7653   return Qnil;
7654 }
7655
7656 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7657        Scoding_system_priority_list, 0, 1, 0,
7658        doc: /* Return a list of coding systems ordered by their priorities.
7659 HIGHESTP non-nil means just return the highest priority one.  */)
7660      (highestp)
7661      Lisp_Object highestp;
7662 {
7663   int i;
7664   Lisp_Object val;
7665
7666   for (i = 0, val = Qnil; i < coding_category_max; i++)
7667     {
7668       enum coding_category category = coding_priorities[i];
7669       int id = coding_categories[category].id;
7670       Lisp_Object attrs;
7671
7672       if (id < 0)
7673         continue;
7674       attrs = CODING_ID_ATTRS (id);
7675       if (! NILP (highestp))
7676         return CODING_ATTR_BASE_NAME (attrs);
7677       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7678     }
7679   return Fnreverse (val);
7680 }
7681
7682 static char *suffixes[] = { "-unix", "-dos", "-mac" };
7683
7684 static Lisp_Object
7685 make_subsidiaries (base)
7686      Lisp_Object base;
7687 {
7688   Lisp_Object subsidiaries;
7689   int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7690   char *buf = (char *) alloca (base_name_len + 6);
7691   int i;
7692
7693   bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7694   subsidiaries = Fmake_vector (make_number (3), Qnil);
7695   for (i = 0; i < 3; i++)
7696     {
7697       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7698       ASET (subsidiaries, i, intern (buf));
7699     }
7700   return subsidiaries;
7701 }
7702
7703
7704 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7705        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
7706        doc: /* For internal use only.
7707 usage: (define-coding-system-internal ...)  */)
7708      (nargs, args)
7709      int nargs;
7710      Lisp_Object *args;
7711 {
7712   Lisp_Object name;
7713   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
7714   Lisp_Object attrs;            /* Vector of attributes.  */
7715   Lisp_Object eol_type;
7716   Lisp_Object aliases;
7717   Lisp_Object coding_type, charset_list, safe_charsets;
7718   enum coding_category category;
7719   Lisp_Object tail, val;
7720   int max_charset_id = 0;
7721   int i;
7722
7723   if (nargs < coding_arg_max)
7724     goto short_args;
7725
7726   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7727
7728   name = args[coding_arg_name];
7729   CHECK_SYMBOL (name);
7730   CODING_ATTR_BASE_NAME (attrs) = name;
7731
7732   val = args[coding_arg_mnemonic];
7733   if (! STRINGP (val))
7734     CHECK_CHARACTER (val);
7735   CODING_ATTR_MNEMONIC (attrs) = val;
7736
7737   coding_type = args[coding_arg_coding_type];
7738   CHECK_SYMBOL (coding_type);
7739   CODING_ATTR_TYPE (attrs) = coding_type;
7740
7741   charset_list = args[coding_arg_charset_list];
7742   if (SYMBOLP (charset_list))
7743     {
7744       if (EQ (charset_list, Qiso_2022))
7745         {
7746           if (! EQ (coding_type, Qiso_2022))
7747             error ("Invalid charset-list");
7748           charset_list = Viso_2022_charset_list;
7749         }
7750       else if (EQ (charset_list, Qemacs_mule))
7751         {
7752           if (! EQ (coding_type, Qemacs_mule))
7753             error ("Invalid charset-list");
7754           charset_list = Vemacs_mule_charset_list;
7755         }
7756       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7757         if (max_charset_id < XFASTINT (XCAR (tail)))
7758           max_charset_id = XFASTINT (XCAR (tail));
7759     }
7760   else
7761     {
7762       charset_list = Fcopy_sequence (charset_list);
7763       for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7764         {
7765           struct charset *charset;
7766
7767           val = Fcar (tail);
7768           CHECK_CHARSET_GET_CHARSET (val, charset);
7769           if (EQ (coding_type, Qiso_2022)
7770               ? CHARSET_ISO_FINAL (charset) < 0
7771               : EQ (coding_type, Qemacs_mule)
7772               ? CHARSET_EMACS_MULE_ID (charset) < 0
7773               : 0)
7774             error ("Can't handle charset `%s'",
7775                    XSYMBOL (CHARSET_NAME (charset))->name->data);
7776
7777           XCAR (tail) = make_number (charset->id);
7778           if (max_charset_id < charset->id)
7779             max_charset_id = charset->id;
7780         }
7781     }
7782   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7783
7784   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7785                                 make_number (255));
7786   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7787     XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7788   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7789
7790   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
7791
7792   val = args[coding_arg_decode_translation_table];
7793   if (! NILP (val))
7794     CHECK_CHAR_TABLE (val);
7795   CODING_ATTR_DECODE_TBL (attrs) = val;
7796
7797   val = args[coding_arg_encode_translation_table];
7798   if (! NILP (val))
7799     CHECK_CHAR_TABLE (val);
7800   CODING_ATTR_ENCODE_TBL (attrs) = val;
7801
7802   val = args[coding_arg_post_read_conversion];
7803   CHECK_SYMBOL (val);
7804   CODING_ATTR_POST_READ (attrs) = val;
7805
7806   val = args[coding_arg_pre_write_conversion];
7807   CHECK_SYMBOL (val);
7808   CODING_ATTR_PRE_WRITE (attrs) = val;
7809
7810   val = args[coding_arg_default_char];
7811   if (NILP (val))
7812     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7813   else
7814     {
7815       CHECK_CHARACTER (val);
7816       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7817     }
7818
7819   val = args[coding_arg_plist];
7820   CHECK_LIST (val);
7821   CODING_ATTR_PLIST (attrs) = val;
7822
7823   if (EQ (coding_type, Qcharset))
7824     {
7825       /* Generate a lisp vector of 256 elements.  Each element is nil,
7826          integer, or a list of charset IDs.
7827
7828          If Nth element is nil, the byte code N is invalid in this
7829          coding system.
7830
7831          If Nth element is a number NUM, N is the first byte of a
7832          charset whose ID is NUM.
7833
7834          If Nth element is a list of charset IDs, N is the first byte
7835          of one of them.  The list is sorted by dimensions of the
7836          charsets.  A charset of smaller dimension comes firtst.
7837       */
7838       val = Fmake_vector (make_number (256), Qnil);
7839
7840       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7841         {
7842           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7843           int dim = CHARSET_DIMENSION (charset);
7844           int idx = (dim - 1) * 4;
7845
7846           if (CHARSET_ASCII_COMPATIBLE_P (charset))
7847             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7848
7849           for (i = charset->code_space[idx];
7850                i <= charset->code_space[idx + 1]; i++)
7851             {
7852               Lisp_Object tmp, tmp2;
7853               int dim2;
7854
7855               tmp = AREF (val, i);
7856               if (NILP (tmp))
7857                 tmp = XCAR (tail);
7858               else if (NUMBERP (tmp))
7859                 {
7860                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
7861                   if (dim < dim2)
7862                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
7863                   else
7864                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
7865                 }
7866               else
7867                 {
7868                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
7869                     {
7870                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
7871                       if (dim < dim2)
7872                         break;
7873                     }
7874                   if (NILP (tmp2))
7875                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
7876                   else
7877                     {
7878                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
7879                       XSETCAR (tmp2, XCAR (tail));
7880                     }
7881                 }
7882               ASET (val, i, tmp);
7883             }
7884         }
7885       ASET (attrs, coding_attr_charset_valids, val);
7886       category = coding_category_charset;
7887     }
7888   else if (EQ (coding_type, Qccl))
7889     {
7890       Lisp_Object valids;
7891
7892       if (nargs < coding_arg_ccl_max)
7893         goto short_args;
7894
7895       val = args[coding_arg_ccl_decoder];
7896       CHECK_CCL_PROGRAM (val);
7897       if (VECTORP (val))
7898         val = Fcopy_sequence (val);
7899       ASET (attrs, coding_attr_ccl_decoder, val);
7900
7901       val = args[coding_arg_ccl_encoder];
7902       CHECK_CCL_PROGRAM (val);
7903       if (VECTORP (val))
7904         val = Fcopy_sequence (val);
7905       ASET (attrs, coding_attr_ccl_encoder, val);
7906
7907       val = args[coding_arg_ccl_valids];
7908       valids = Fmake_string (make_number (256), make_number (0));
7909       for (tail = val; !NILP (tail); tail = Fcdr (tail))
7910         {
7911           int from, to;
7912
7913           val = Fcar (tail);
7914           if (INTEGERP (val))
7915             {
7916               from = to = XINT (val);
7917               if (from < 0 || from > 255)
7918                 args_out_of_range_3 (val, make_number (0), make_number (255));
7919             }
7920           else
7921             {
7922               CHECK_CONS (val);
7923               CHECK_NUMBER (XCAR (val));
7924               CHECK_NUMBER (XCDR (val));
7925               from = XINT (XCAR (val));
7926               if (from < 0 || from > 255)
7927                 args_out_of_range_3 (XCAR (val),
7928                                      make_number (0), make_number (255));
7929               to = XINT (XCDR (val));
7930               if (to < from || to > 255)
7931                 args_out_of_range_3 (XCDR (val),
7932                                      XCAR (val), make_number (255));
7933             }
7934           for (i = from; i <= to; i++)
7935             XSTRING (valids)->data[i] = 1;
7936         }
7937       ASET (attrs, coding_attr_ccl_valids, valids);
7938
7939       category = coding_category_ccl;
7940     }
7941   else if (EQ (coding_type, Qutf_16))
7942     {
7943       Lisp_Object bom, endian;
7944
7945       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
7946
7947       if (nargs < coding_arg_utf16_max)
7948         goto short_args;
7949
7950       bom = args[coding_arg_utf16_bom];
7951       if (! NILP (bom) && ! EQ (bom, Qt))
7952         {
7953           CHECK_CONS (bom);
7954           CHECK_CODING_SYSTEM (XCAR (bom));
7955           CHECK_CODING_SYSTEM (XCDR (bom));
7956         }
7957       ASET (attrs, coding_attr_utf_16_bom, bom);
7958
7959       endian = args[coding_arg_utf16_endian];
7960       ASET (attrs, coding_attr_utf_16_endian, endian);
7961
7962       category = (CONSP (bom)
7963                   ? coding_category_utf_16_auto
7964                   : NILP (bom)
7965                   ? (NILP (endian)
7966                      ? coding_category_utf_16_be_nosig
7967                      : coding_category_utf_16_le_nosig)
7968                   : (NILP (endian)
7969                      ? coding_category_utf_16_be
7970                      : coding_category_utf_16_le));
7971     }
7972   else if (EQ (coding_type, Qiso_2022))
7973     {
7974       Lisp_Object initial, reg_usage, request, flags;
7975       int i, id;
7976
7977       if (nargs < coding_arg_iso2022_max)
7978         goto short_args;
7979
7980       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7981       CHECK_VECTOR (initial);
7982       for (i = 0; i < 4; i++)
7983         {
7984           val = Faref (initial, make_number (i));
7985           if (! NILP (val))
7986             {
7987               struct charset *charset;
7988
7989               CHECK_CHARSET_GET_CHARSET (val, charset);
7990               ASET (initial, i, make_number (CHARSET_ID (charset)));
7991               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
7992                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7993             }
7994           else
7995             ASET (initial, i, make_number (-1));
7996         }
7997
7998       reg_usage = args[coding_arg_iso2022_reg_usage];
7999       CHECK_CONS (reg_usage);
8000       CHECK_NATNUM (XCAR (reg_usage));
8001       CHECK_NATNUM (XCDR (reg_usage));
8002
8003       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8004       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
8005         {
8006           int id;
8007
8008           val = Fcar (tail);
8009           CHECK_CONS (val);
8010           CHECK_CHARSET_GET_ID (XCAR (val), id);
8011           CHECK_NATNUM (XCDR (val));
8012           if (XINT (XCDR (val)) >= 4)
8013             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8014           XCAR (val) = make_number (id);
8015         }
8016
8017       flags = args[coding_arg_iso2022_flags];
8018       CHECK_NATNUM (flags);
8019       i = XINT (flags);
8020       if (EQ (args[coding_arg_charset_list], Qiso_2022))
8021         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8022
8023       ASET (attrs, coding_attr_iso_initial, initial);
8024       ASET (attrs, coding_attr_iso_usage, reg_usage);
8025       ASET (attrs, coding_attr_iso_request, request);
8026       ASET (attrs, coding_attr_iso_flags, flags);
8027       setup_iso_safe_charsets (attrs);
8028
8029       if (i & CODING_ISO_FLAG_SEVEN_BITS)
8030         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8031                           | CODING_ISO_FLAG_SINGLE_SHIFT))
8032                     ? coding_category_iso_7_else
8033                     : EQ (args[coding_arg_charset_list], Qiso_2022)
8034                     ? coding_category_iso_7
8035                     : coding_category_iso_7_tight);
8036       else
8037         {
8038           int id = XINT (AREF (initial, 1));
8039
8040           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
8041                        || EQ (args[coding_arg_charset_list], Qiso_2022)
8042                        || id < 0)
8043                       ? coding_category_iso_8_else
8044                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8045                       ? coding_category_iso_8_1
8046                       : coding_category_iso_8_2);
8047         }
8048       if (category != coding_category_iso_8_1
8049           && category != coding_category_iso_8_2)
8050         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8051     }
8052   else if (EQ (coding_type, Qemacs_mule))
8053     {
8054       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8055         ASET (attrs, coding_attr_emacs_mule_full, Qt);
8056       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8057       category = coding_category_emacs_mule;
8058     }
8059   else if (EQ (coding_type, Qshift_jis))
8060     {
8061
8062       struct charset *charset;
8063
8064       if (XINT (Flength (charset_list)) != 3)
8065         error ("There should be just three charsets");
8066
8067       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8068       if (CHARSET_DIMENSION (charset) != 1)
8069         error ("Dimension of charset %s is not one",
8070                XSYMBOL (CHARSET_NAME (charset))->name->data);
8071       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8072         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8073
8074       charset_list = XCDR (charset_list);
8075       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8076       if (CHARSET_DIMENSION (charset) != 1)
8077         error ("Dimension of charset %s is not one",
8078                XSYMBOL (CHARSET_NAME (charset))->name->data);
8079
8080       charset_list = XCDR (charset_list);
8081       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8082       if (CHARSET_DIMENSION (charset) != 2)
8083         error ("Dimension of charset %s is not two",
8084                XSYMBOL (CHARSET_NAME (charset))->name->data);
8085
8086       category = coding_category_sjis;
8087       Vsjis_coding_system = name;
8088     }
8089   else if (EQ (coding_type, Qbig5))
8090     {
8091       struct charset *charset;
8092
8093       if (XINT (Flength (charset_list)) != 2)
8094         error ("There should be just two charsets");
8095
8096       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8097       if (CHARSET_DIMENSION (charset) != 1)
8098         error ("Dimension of charset %s is not one",
8099                XSYMBOL (CHARSET_NAME (charset))->name->data);
8100       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8101         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8102
8103       charset_list = XCDR (charset_list);
8104       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8105       if (CHARSET_DIMENSION (charset) != 2)
8106         error ("Dimension of charset %s is not two",
8107                XSYMBOL (CHARSET_NAME (charset))->name->data);
8108
8109       category = coding_category_big5;
8110       Vbig5_coding_system = name;
8111     }
8112   else if (EQ (coding_type, Qraw_text))
8113     {
8114       category = coding_category_raw_text;
8115       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8116     }
8117   else if (EQ (coding_type, Qutf_8))
8118     {
8119       category = coding_category_utf_8;
8120       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8121     }
8122   else if (EQ (coding_type, Qundecided))
8123     category = coding_category_undecided;
8124   else
8125     error ("Invalid coding system type: %s",
8126            XSYMBOL (coding_type)->name->data);
8127
8128   CODING_ATTR_CATEGORY (attrs) = make_number (category);
8129
8130   eol_type = args[coding_arg_eol_type];
8131   if (! NILP (eol_type)
8132       && ! EQ (eol_type, Qunix)
8133       && ! EQ (eol_type, Qdos)
8134       && ! EQ (eol_type, Qmac))
8135     error ("Invalid eol-type");
8136
8137   aliases = Fcons (name, Qnil);
8138
8139   if (NILP (eol_type))
8140     {
8141       eol_type = make_subsidiaries (name);
8142       for (i = 0; i < 3; i++)
8143         {
8144           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8145
8146           this_name = AREF (eol_type, i);
8147           this_aliases = Fcons (this_name, Qnil);
8148           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8149           this_spec = Fmake_vector (make_number (3), attrs);
8150           ASET (this_spec, 1, this_aliases);
8151           ASET (this_spec, 2, this_eol_type);
8152           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8153           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8154           Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8155                                         Vcoding_system_alist);
8156         }
8157     }
8158
8159   spec_vec = Fmake_vector (make_number (3), attrs);
8160   ASET (spec_vec, 1, aliases);
8161   ASET (spec_vec, 2, eol_type);
8162
8163   Fputhash (name, spec_vec, Vcoding_system_hash_table);
8164   Vcoding_system_list = Fcons (name, Vcoding_system_list);
8165   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8166                                 Vcoding_system_alist);
8167
8168   {
8169     int id = coding_categories[category].id;
8170
8171     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8172       setup_coding_system (name, &coding_categories[category]);
8173   }
8174
8175   return Qnil;
8176
8177  short_args:
8178   return Fsignal (Qwrong_number_of_arguments,
8179                   Fcons (intern ("define-coding-system-internal"),
8180                          make_number (nargs)));
8181 }
8182
8183 /* Fixme: should this record the alias relationships for
8184    diagnostics?  Should it update coding-system-list?  */
8185 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8186        Sdefine_coding_system_alias, 2, 2, 0,
8187        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
8188      (alias, coding_system)
8189      Lisp_Object alias, coding_system;
8190 {
8191   Lisp_Object spec, aliases, eol_type;
8192
8193   CHECK_SYMBOL (alias);
8194   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8195   aliases = AREF (spec, 1);
8196   while (!NILP (XCDR (aliases)))
8197     aliases = XCDR (aliases);
8198   XCDR (aliases) = Fcons (alias, Qnil);
8199
8200   eol_type = AREF (spec, 2);
8201   if (VECTORP (eol_type))
8202     {
8203       Lisp_Object subsidiaries;
8204       int i;
8205
8206       subsidiaries = make_subsidiaries (alias);
8207       for (i = 0; i < 3; i++)
8208         Fdefine_coding_system_alias (AREF (subsidiaries, i),
8209                                      AREF (eol_type, i));
8210
8211       ASET (spec, 2, subsidiaries);
8212     }
8213
8214   Fputhash (alias, spec, Vcoding_system_hash_table);
8215   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8216                                 Vcoding_system_alist);
8217
8218   return Qnil;
8219 }
8220
8221 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8222        1, 1, 0,
8223        doc: /* Return the base of CODING-SYSTEM.
8224 Any alias or subsidiary coding system is not a base coding system.  */)
8225   (coding_system)
8226      Lisp_Object coding_system;
8227 {
8228   Lisp_Object spec, attrs;
8229
8230   if (NILP (coding_system))
8231     return (Qno_conversion);
8232   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8233   attrs = AREF (spec, 0);
8234   return CODING_ATTR_BASE_NAME (attrs);
8235 }
8236
8237 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8238        1, 1, 0,
8239        doc: "Return the property list of CODING-SYSTEM.")
8240      (coding_system)
8241      Lisp_Object coding_system;
8242 {
8243   Lisp_Object spec, attrs;
8244
8245   if (NILP (coding_system))
8246     coding_system = Qno_conversion;
8247   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8248   attrs = AREF (spec, 0);
8249   return CODING_ATTR_PLIST (attrs);
8250 }
8251
8252
8253 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8254        1, 1, 0,
8255        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
8256      (coding_system)
8257      Lisp_Object coding_system;
8258 {
8259   Lisp_Object spec;
8260
8261   if (NILP (coding_system))
8262     coding_system = Qno_conversion;
8263   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8264   return AREF (spec, 1);
8265 }
8266
8267 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8268        Scoding_system_eol_type, 1, 1, 0,
8269        doc: /* Return eol-type of CODING-SYSTEM.
8270 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8271
8272 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8273 and CR respectively.
8274
8275 A vector value indicates that a format of end-of-line should be
8276 detected automatically.  Nth element of the vector is the subsidiary
8277 coding system whose eol-type is N.  */)
8278      (coding_system)
8279      Lisp_Object coding_system;
8280 {
8281   Lisp_Object spec, eol_type;
8282   int n;
8283
8284   if (NILP (coding_system))
8285     coding_system = Qno_conversion;
8286   if (! CODING_SYSTEM_P (coding_system))
8287     return Qnil;
8288   spec = CODING_SYSTEM_SPEC (coding_system);
8289   eol_type = AREF (spec, 2);
8290   if (VECTORP (eol_type))
8291     return Fcopy_sequence (eol_type);
8292   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8293   return make_number (n);
8294 }
8295
8296 #endif /* emacs */
8297
8298 \f
8299 /*** 9. Post-amble ***/
8300
8301 void
8302 init_coding_once ()
8303 {
8304   int i;
8305
8306   for (i = 0; i < coding_category_max; i++)
8307     {
8308       coding_categories[i].id = -1;
8309       coding_priorities[i] = i;
8310     }
8311
8312   /* ISO2022 specific initialize routine.  */
8313   for (i = 0; i < 0x20; i++)
8314     iso_code_class[i] = ISO_control_0;
8315   for (i = 0x21; i < 0x7F; i++)
8316     iso_code_class[i] = ISO_graphic_plane_0;
8317   for (i = 0x80; i < 0xA0; i++)
8318     iso_code_class[i] = ISO_control_1;
8319   for (i = 0xA1; i < 0xFF; i++)
8320     iso_code_class[i] = ISO_graphic_plane_1;
8321   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8322   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8323   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
8324   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8325   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8326   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8327   iso_code_class[ISO_CODE_ESC] = ISO_escape;
8328   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8329   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
8330   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
8331
8332   inhibit_pre_post_conversion = 0;
8333
8334   for (i = 0; i < 256; i++)
8335     {
8336       emacs_mule_bytes[i] = 1;
8337     }
8338   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
8339   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
8340   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
8341   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
8342 }
8343
8344 #ifdef emacs
8345
8346 void
8347 syms_of_coding ()
8348 {
8349   staticpro (&Vcoding_system_hash_table);
8350   Vcoding_system_hash_table = Fmakehash (Qeq);
8351
8352   staticpro (&Vsjis_coding_system);
8353   Vsjis_coding_system = Qnil;
8354
8355   staticpro (&Vbig5_coding_system);
8356   Vbig5_coding_system = Qnil;
8357
8358   staticpro (&Vcode_conversion_work_buf_list);
8359   Vcode_conversion_work_buf_list = Qnil;
8360
8361   staticpro (&Vcode_conversion_reused_work_buf);
8362   Vcode_conversion_reused_work_buf = Qnil;
8363
8364   DEFSYM (Qcharset, "charset");
8365   DEFSYM (Qtarget_idx, "target-idx");
8366   DEFSYM (Qcoding_system_history, "coding-system-history");
8367   Fset (Qcoding_system_history, Qnil);
8368
8369   /* Target FILENAME is the first argument.  */
8370   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
8371   /* Target FILENAME is the third argument.  */
8372   Fput (Qwrite_region, Qtarget_idx, make_number (2));
8373
8374   DEFSYM (Qcall_process, "call-process");
8375   /* Target PROGRAM is the first argument.  */
8376   Fput (Qcall_process, Qtarget_idx, make_number (0));
8377
8378   DEFSYM (Qcall_process_region, "call-process-region");
8379   /* Target PROGRAM is the third argument.  */
8380   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
8381
8382   DEFSYM (Qstart_process, "start-process");
8383   /* Target PROGRAM is the third argument.  */
8384   Fput (Qstart_process, Qtarget_idx, make_number (2));
8385
8386   DEFSYM (Qopen_network_stream, "open-network-stream");
8387   /* Target SERVICE is the fourth argument.  */
8388   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8389
8390   DEFSYM (Qcoding_system, "coding-system");
8391   DEFSYM (Qcoding_aliases, "coding-aliases");
8392
8393   DEFSYM (Qeol_type, "eol-type");
8394   DEFSYM (Qunix, "unix");
8395   DEFSYM (Qdos, "dos");
8396
8397   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8398   DEFSYM (Qpost_read_conversion, "post-read-conversion");
8399   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8400   DEFSYM (Qdefault_char, "default-char");
8401   DEFSYM (Qundecided, "undecided");
8402   DEFSYM (Qno_conversion, "no-conversion");
8403   DEFSYM (Qraw_text, "raw-text");
8404
8405   DEFSYM (Qiso_2022, "iso-2022");
8406
8407   DEFSYM (Qutf_8, "utf-8");
8408
8409   DEFSYM (Qutf_16, "utf-16");
8410   DEFSYM (Qsignature, "signature");
8411   DEFSYM (Qendian, "endian");
8412   DEFSYM (Qbig, "big");
8413   DEFSYM (Qlittle, "little");
8414
8415   DEFSYM (Qshift_jis, "shift-jis");
8416   DEFSYM (Qbig5, "big5");
8417
8418   DEFSYM (Qcoding_system_p, "coding-system-p");
8419
8420   DEFSYM (Qcoding_system_error, "coding-system-error");
8421   Fput (Qcoding_system_error, Qerror_conditions,
8422         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
8423   Fput (Qcoding_system_error, Qerror_message,
8424         build_string ("Invalid coding system"));
8425
8426   /* Intern this now in case it isn't already done.
8427      Setting this variable twice is harmless.
8428      But don't staticpro it here--that is done in alloc.c.  */
8429   Qchar_table_extra_slots = intern ("char-table-extra-slots");
8430
8431   DEFSYM (Qtranslation_table, "translation-table");
8432   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
8433   DEFSYM (Qtranslation_table_id, "translation-table-id");
8434   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8435   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
8436
8437   DEFSYM (Qvalid_codes, "valid-codes");
8438
8439   DEFSYM (Qemacs_mule, "emacs-mule");
8440
8441   Vcoding_category_table
8442     = Fmake_vector (make_number (coding_category_max), Qnil);
8443   staticpro (&Vcoding_category_table);
8444   /* Followings are target of code detection.  */
8445   ASET (Vcoding_category_table, coding_category_iso_7,
8446         intern ("coding-category-iso-7"));
8447   ASET (Vcoding_category_table, coding_category_iso_7_tight,
8448         intern ("coding-category-iso-7-tight"));
8449   ASET (Vcoding_category_table, coding_category_iso_8_1,
8450         intern ("coding-category-iso-8-1"));
8451   ASET (Vcoding_category_table, coding_category_iso_8_2,
8452         intern ("coding-category-iso-8-2"));
8453   ASET (Vcoding_category_table, coding_category_iso_7_else,
8454         intern ("coding-category-iso-7-else"));
8455   ASET (Vcoding_category_table, coding_category_iso_8_else,
8456         intern ("coding-category-iso-8-else"));
8457   ASET (Vcoding_category_table, coding_category_utf_8,
8458         intern ("coding-category-utf-8"));
8459   ASET (Vcoding_category_table, coding_category_utf_16_be,
8460         intern ("coding-category-utf-16-be"));
8461   ASET (Vcoding_category_table, coding_category_utf_16_le,
8462         intern ("coding-category-utf-16-le"));
8463   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8464         intern ("coding-category-utf-16-be-nosig"));
8465   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8466         intern ("coding-category-utf-16-le-nosig"));
8467   ASET (Vcoding_category_table, coding_category_charset,
8468         intern ("coding-category-charset"));
8469   ASET (Vcoding_category_table, coding_category_sjis,
8470         intern ("coding-category-sjis"));
8471   ASET (Vcoding_category_table, coding_category_big5,
8472         intern ("coding-category-big5"));
8473   ASET (Vcoding_category_table, coding_category_ccl,
8474         intern ("coding-category-ccl"));
8475   ASET (Vcoding_category_table, coding_category_emacs_mule,
8476         intern ("coding-category-emacs-mule"));
8477   /* Followings are NOT target of code detection.  */
8478   ASET (Vcoding_category_table, coding_category_raw_text,
8479         intern ("coding-category-raw-text"));
8480   ASET (Vcoding_category_table, coding_category_undecided,
8481         intern ("coding-category-undecided"));
8482
8483   defsubr (&Scoding_system_p);
8484   defsubr (&Sread_coding_system);
8485   defsubr (&Sread_non_nil_coding_system);
8486   defsubr (&Scheck_coding_system);
8487   defsubr (&Sdetect_coding_region);
8488   defsubr (&Sdetect_coding_string);
8489   defsubr (&Sfind_coding_systems_region_internal);
8490   defsubr (&Scheck_coding_systems_region);
8491   defsubr (&Sdecode_coding_region);
8492   defsubr (&Sencode_coding_region);
8493   defsubr (&Sdecode_coding_string);
8494   defsubr (&Sencode_coding_string);
8495   defsubr (&Sdecode_sjis_char);
8496   defsubr (&Sencode_sjis_char);
8497   defsubr (&Sdecode_big5_char);
8498   defsubr (&Sencode_big5_char);
8499   defsubr (&Sset_terminal_coding_system_internal);
8500   defsubr (&Sset_safe_terminal_coding_system_internal);
8501   defsubr (&Sterminal_coding_system);
8502   defsubr (&Sset_keyboard_coding_system_internal);
8503   defsubr (&Skeyboard_coding_system);
8504   defsubr (&Sfind_operation_coding_system);
8505   defsubr (&Sset_coding_system_priority);
8506   defsubr (&Sdefine_coding_system_internal);
8507   defsubr (&Sdefine_coding_system_alias);
8508   defsubr (&Scoding_system_base);
8509   defsubr (&Scoding_system_plist);
8510   defsubr (&Scoding_system_aliases);
8511   defsubr (&Scoding_system_eol_type);
8512   defsubr (&Scoding_system_priority_list);
8513
8514   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
8515                doc: /* List of coding systems.
8516
8517 Do not alter the value of this variable manually.  This variable should be
8518 updated by the functions `define-coding-system' and
8519 `define-coding-system-alias'.  */);
8520   Vcoding_system_list = Qnil;
8521
8522   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
8523                doc: /* Alist of coding system names.
8524 Each element is one element list of coding system name.
8525 This variable is given to `completing-read' as TABLE argument.
8526
8527 Do not alter the value of this variable manually.  This variable should be
8528 updated by the functions `make-coding-system' and
8529 `define-coding-system-alias'.  */);
8530   Vcoding_system_alist = Qnil;
8531
8532   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
8533                doc: /* List of coding-categories (symbols) ordered by priority.
8534
8535 On detecting a coding system, Emacs tries code detection algorithms
8536 associated with each coding-category one by one in this order.  When
8537 one algorithm agrees with a byte sequence of source text, the coding
8538 system bound to the corresponding coding-category is selected.  */);
8539   {
8540     int i;
8541
8542     Vcoding_category_list = Qnil;
8543     for (i = coding_category_max - 1; i >= 0; i--)
8544       Vcoding_category_list
8545         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8546                  Vcoding_category_list);
8547   }
8548
8549   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
8550                doc: /* Specify the coding system for read operations.
8551 It is useful to bind this variable with `let', but do not set it globally.
8552 If the value is a coding system, it is used for decoding on read operation.
8553 If not, an appropriate element is used from one of the coding system alists:
8554 There are three such tables, `file-coding-system-alist',
8555 `process-coding-system-alist', and `network-coding-system-alist'.  */);
8556   Vcoding_system_for_read = Qnil;
8557
8558   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
8559                doc: /* Specify the coding system for write operations.
8560 Programs bind this variable with `let', but you should not set it globally.
8561 If the value is a coding system, it is used for encoding of output,
8562 when writing it to a file and when sending it to a file or subprocess.
8563
8564 If this does not specify a coding system, an appropriate element
8565 is used from one of the coding system alists:
8566 There are three such tables, `file-coding-system-alist',
8567 `process-coding-system-alist', and `network-coding-system-alist'.
8568 For output to files, if the above procedure does not specify a coding system,
8569 the value of `buffer-file-coding-system' is used.  */);
8570   Vcoding_system_for_write = Qnil;
8571
8572   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
8573                doc: /*
8574 Coding system used in the latest file or process I/O.  */);
8575   Vlast_coding_system_used = Qnil;
8576
8577   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
8578                doc: /*
8579 *Non-nil means always inhibit code conversion of end-of-line format.
8580 See info node `Coding Systems' and info node `Text and Binary' concerning
8581 such conversion.  */);
8582   inhibit_eol_conversion = 0;
8583
8584   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
8585                doc: /*
8586 Non-nil means process buffer inherits coding system of process output.
8587 Bind it to t if the process output is to be treated as if it were a file
8588 read from some filesystem.  */);
8589   inherit_process_coding_system = 0;
8590
8591   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
8592                doc: /*
8593 Alist to decide a coding system to use for a file I/O operation.
8594 The format is ((PATTERN . VAL) ...),
8595 where PATTERN is a regular expression matching a file name,
8596 VAL is a coding system, a cons of coding systems, or a function symbol.
8597 If VAL is a coding system, it is used for both decoding and encoding
8598 the file contents.
8599 If VAL is a cons of coding systems, the car part is used for decoding,
8600 and the cdr part is used for encoding.
8601 If VAL is a function symbol, the function must return a coding system
8602 or a cons of coding systems which are used as above.  The function gets
8603 the arguments with which `find-operation-coding-systems' was called.
8604
8605 See also the function `find-operation-coding-system'
8606 and the variable `auto-coding-alist'.  */);
8607   Vfile_coding_system_alist = Qnil;
8608
8609   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
8610                doc: /*
8611 Alist to decide a coding system to use for a process I/O operation.
8612 The format is ((PATTERN . VAL) ...),
8613 where PATTERN is a regular expression matching a program name,
8614 VAL is a coding system, a cons of coding systems, or a function symbol.
8615 If VAL is a coding system, it is used for both decoding what received
8616 from the program and encoding what sent to the program.
8617 If VAL is a cons of coding systems, the car part is used for decoding,
8618 and the cdr part is used for encoding.
8619 If VAL is a function symbol, the function must return a coding system
8620 or a cons of coding systems which are used as above.
8621
8622 See also the function `find-operation-coding-system'.  */);
8623   Vprocess_coding_system_alist = Qnil;
8624
8625   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
8626                doc: /*
8627 Alist to decide a coding system to use for a network I/O operation.
8628 The format is ((PATTERN . VAL) ...),
8629 where PATTERN is a regular expression matching a network service name
8630 or is a port number to connect to,
8631 VAL is a coding system, a cons of coding systems, or a function symbol.
8632 If VAL is a coding system, it is used for both decoding what received
8633 from the network stream and encoding what sent to the network stream.
8634 If VAL is a cons of coding systems, the car part is used for decoding,
8635 and the cdr part is used for encoding.
8636 If VAL is a function symbol, the function must return a coding system
8637 or a cons of coding systems which are used as above.
8638
8639 See also the function `find-operation-coding-system'.  */);
8640   Vnetwork_coding_system_alist = Qnil;
8641
8642   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
8643                doc: /* Coding system to use with system messages.
8644 Also used for decoding keyboard input on X Window system.  */);
8645   Vlocale_coding_system = Qnil;
8646
8647   /* The eol mnemonics are reset in startup.el system-dependently.  */
8648   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
8649                doc: /*
8650 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
8651   eol_mnemonic_unix = build_string (":");
8652
8653   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
8654                doc: /*
8655 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
8656   eol_mnemonic_dos = build_string ("\\");
8657
8658   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
8659                doc: /*
8660 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
8661   eol_mnemonic_mac = build_string ("/");
8662
8663   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
8664                doc: /*
8665 *String displayed in mode line when end-of-line format is not yet determined.  */);
8666   eol_mnemonic_undecided = build_string (":");
8667
8668   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
8669                doc: /*
8670 *Non-nil enables character translation while encoding and decoding.  */);
8671   Venable_character_translation = Qt;
8672
8673   DEFVAR_LISP ("standard-translation-table-for-decode",
8674                &Vstandard_translation_table_for_decode,
8675                doc: /* Table for translating characters while decoding.  */);
8676   Vstandard_translation_table_for_decode = Qnil;
8677
8678   DEFVAR_LISP ("standard-translation-table-for-encode",
8679                &Vstandard_translation_table_for_encode,
8680                doc: /* Table for translating characters while encoding.  */);
8681   Vstandard_translation_table_for_encode = Qnil;
8682
8683   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
8684                doc: /* Alist of charsets vs revision numbers.
8685 While encoding, if a charset (car part of an element) is found,
8686 designate it with the escape sequence identifying revision (cdr part
8687 of the element).  */);
8688   Vcharset_revision_table = Qnil;
8689
8690   DEFVAR_LISP ("default-process-coding-system",
8691                &Vdefault_process_coding_system,
8692                doc: /* Cons of coding systems used for process I/O by default.
8693 The car part is used for decoding a process output,
8694 the cdr part is used for encoding a text to be sent to a process.  */);
8695   Vdefault_process_coding_system = Qnil;
8696
8697   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
8698                doc: /*
8699 Table of extra Latin codes in the range 128..159 (inclusive).
8700 This is a vector of length 256.
8701 If Nth element is non-nil, the existence of code N in a file
8702 \(or output of subprocess) doesn't prevent it to be detected as
8703 a coding system of ISO 2022 variant which has a flag
8704 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8705 or reading output of a subprocess.
8706 Only 128th through 159th elements has a meaning.  */);
8707   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
8708
8709   DEFVAR_LISP ("select-safe-coding-system-function",
8710                &Vselect_safe_coding_system_function,
8711                doc: /*
8712 Function to call to select safe coding system for encoding a text.
8713
8714 If set, this function is called to force a user to select a proper
8715 coding system which can encode the text in the case that a default
8716 coding system used in each operation can't encode the text.
8717
8718 The default value is `select-safe-coding-system' (which see).  */);
8719   Vselect_safe_coding_system_function = Qnil;
8720
8721   DEFVAR_BOOL ("inhibit-iso-escape-detection",
8722                &inhibit_iso_escape_detection,
8723                doc: /*
8724 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
8725
8726 By default, on reading a file, Emacs tries to detect how the text is
8727 encoded.  This code detection is sensitive to escape sequences.  If
8728 the sequence is valid as ISO2022, the code is determined as one of
8729 the ISO2022 encodings, and the file is decoded by the corresponding
8730 coding system (e.g. `iso-2022-7bit').
8731
8732 However, there may be a case that you want to read escape sequences in
8733 a file as is.  In such a case, you can set this variable to non-nil.
8734 Then, as the code detection ignores any escape sequences, no file is
8735 detected as encoded in some ISO2022 encoding.  The result is that all
8736 escape sequences become visible in a buffer.
8737
8738 The default value is nil, and it is strongly recommended not to change
8739 it.  That is because many Emacs Lisp source files that contain
8740 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8741 in Emacs's distribution, and they won't be decoded correctly on
8742 reading if you suppress escape sequence detection.
8743
8744 The other way to read escape sequences in a file without decoding is
8745 to explicitly specify some coding system that doesn't use ISO2022's
8746 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
8747   inhibit_iso_escape_detection = 0;
8748
8749   {
8750     Lisp_Object args[coding_arg_max];
8751     Lisp_Object plist[14];
8752     int i;
8753
8754     for (i = 0; i < coding_arg_max; i++)
8755       args[i] = Qnil;
8756
8757     plist[0] = intern (":name");
8758     plist[1] = args[coding_arg_name] = Qno_conversion;
8759     plist[2] = intern (":mnemonic");
8760     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8761     plist[4] = intern (":coding-type");
8762     plist[5] = args[coding_arg_coding_type] = Qraw_text;
8763     plist[6] = intern (":ascii-compatible-p");
8764     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8765     plist[8] = intern (":default-char");
8766     plist[9] = args[coding_arg_default_char] = make_number (0);
8767     plist[10] = intern (":docstring");
8768     plist[11] = build_string ("Do no conversion.\n\
8769 \n\
8770 When you visit a file with this coding, the file is read into a\n\
8771 unibyte buffer as is, thus each byte of a file is treated as a\n\
8772 character.");
8773     plist[12] = intern (":eol-type");
8774     plist[13] = args[coding_arg_eol_type] = Qunix;
8775     args[coding_arg_plist] = Flist (14, plist);
8776     Fdefine_coding_system_internal (coding_arg_max, args);
8777   }
8778
8779   setup_coding_system (Qno_conversion, &keyboard_coding);
8780   setup_coding_system (Qno_conversion, &terminal_coding);
8781   setup_coding_system (Qno_conversion, &safe_terminal_coding);
8782 }
8783
8784 char *
8785 emacs_strerror (error_number)
8786      int error_number;
8787 {
8788   char *str;
8789
8790   synchronize_system_messages_locale ();
8791   str = strerror (error_number);
8792
8793   if (! NILP (Vlocale_coding_system))
8794     {
8795       Lisp_Object dec = code_convert_string_norecord (build_string (str),
8796                                                       Vlocale_coding_system,
8797                                                       0);
8798       str = (char *) XSTRING (dec)->data;
8799     }
8800
8801   return str;
8802 }
8803
8804 #endif /* emacs */