src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001, 2002 Free Software Foundation, Inc.
   5    Copyright (C) 2001, 2002
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H13PRO009
   8
   9 This file is part of GNU Emacs.
  10
  11 GNU Emacs is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU General Public License as published by
  13 the Free Software Foundation; either version 2, or (at your option)
  14 any later version.
  15
  16 GNU Emacs is distributed in the hope that it will be useful,
  17 but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 GNU General Public License for more details.
  20
  21 You should have received a copy of the GNU General Public License
  22 along with GNU Emacs; see the file COPYING.  If not, write to
  23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  24 Boston, MA 02111-1307, USA.  */
  25
  26 /*** TABLE OF CONTENTS ***
  27
  28   0. General comments
  29   1. Preamble
  30   2. Emacs' internal format (emacs-utf-8) handlers
  31   3. UTF-8 handlers
  32   4. UTF-16 handlers
  33   5. Charset-base coding systems handlers
  34   6. emacs-mule (old Emacs' internal format) handlers
  35   7. ISO2022 handlers
  36   8. Shift-JIS and BIG5 handlers
  37   9. CCL handlers
  38   10. C library functions
  39   11. Emacs Lisp library functions
  40   12. Postamble
  41
  42 */
  43
  44 /*** 0. General comments ***
  45
  46
  47 CODING SYSTEM
  48
  49   A coding system is an object for an encoding mechanism that contains
  50   information about how to convert byte sequences to character
  51   sequences and vice versa.  When we say "decode", it means converting
  52   a byte sequence of a specific coding system into a character
  53   sequence that is represented by Emacs' internal coding system
  54   `emacs-utf-8', and when we say "encode", it means converting a
  55   character sequence of emacs-utf-8 to a byte sequence of a specific
  56   coding system.
  57
  58   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  59   C level, a coding system is represented by a vector of attributes
  60   stored in the hash table Vcharset_hash_table.  The conversion from
  61   coding system symbol to attributes vector is done by looking up
  62   Vcharset_hash_table by the symbol.
  63
  64   Coding systems are classified into the following types depending on
  65   the encoding mechanism.  Here's a brief description of the types.
  66
  67   o UTF-8
  68
  69   o UTF-16
  70
  71   o Charset-base coding system
  72
  73   A coding system defined by one or more (coded) character sets.
  74   Decoding and encoding are done by a code converter defined for each
  75   character set.
  76
  77   o Old Emacs internal format (emacs-mule)
  78
  79   The coding system adopted by old versions of Emacs (20 and 21).
  80
  81   o ISO2022-base coding system
  82
  83   The most famous coding system for multiple character sets.  X's
  84   Compound Text, various EUCs (Extended Unix Code), and coding systems
  85   used in the Internet communication such as ISO-2022-JP are all
  86   variants of ISO2022.
  87
  88   o SJIS (or Shift-JIS or MS-Kanji-Code)
  89
  90   A coding system to encode character sets: ASCII, JISX0201, and
  91   JISX0208.  Widely used for PC's in Japan.  Details are described in
  92   section 8.
  93
  94   o BIG5
  95
  96   A coding system to encode character sets: ASCII and Big5.  Widely
  97   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  98   described in section 8.  In this file, when we write "big5" (all
  99   lowercase), we mean the coding system, and when we write "Big5"
 100   (capitalized), we mean the character set.
 101
 102   o CCL
 103
 104   If a user wants to decode/encode text encoded in a coding system
 105   not listed above, he can supply a decoder and an encoder for it in
 106   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 107   program while decoding/encoding.
 108
 109   o Raw-text
 110
 111   A coding system for text containing raw eight-bit data.  Emacs
 112   treats each byte of source text as a character (except for
 113   end-of-line conversion).
 114
 115   o No-conversion
 116
 117   Like raw text, but don't do end-of-line conversion.
 118
 119
 120 END-OF-LINE FORMAT
 121
 122   How text end-of-line is encoded depends on operating system.  For
 123   instance, Unix's format is just one byte of LF (line-feed) code,
 124   whereas DOS's format is two-byte sequence of `carriage-return' and
 125   `line-feed' codes.  MacOS's format is usually one byte of
 126   `carriage-return'.
 127
 128   Since text character encoding and end-of-line encoding are
 129   independent, any coding system described above can take any format
 130   of end-of-line (except for no-conversion).
 131
 132 STRUCT CODING_SYSTEM
 133
 134   Before using a coding system for code conversion (i.e. decoding and
 135   encoding), we setup a structure of type `struct coding_system'.
 136   This structure keeps various information about a specific code
 137   conversion (e.g. the location of source and destination data).
 138
 139 */
 140
 141 /* COMMON MACROS */
 142
 143
 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 145
 146   These functions check if a byte sequence specified as a source in
 147   CODING conforms to the format of XXX, and update the members of
 148   DETECT_INFO.
 149
 150   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 151
 152   Below is the template of these functions.  */
 153
 154 #if 0
 155 static int
 156 detect_coding_XXX (coding, detect_info)
 157      struct coding_system *coding;
 158      struct coding_detection_info *detect_info;
 159 {
 160   unsigned char *src = coding->source;
 161   unsigned char *src_end = coding->source + coding->src_bytes;
 162   int multibytep = coding->src_multibyte;
 163   int consumed_chars = 0;
 164   int found = 0;
 165   ...;
 166
 167   while (1)
 168     {
 169       /* Get one byte from the source.  If the souce is exausted, jump
 170          to no_more_source:.  */
 171       ONE_MORE_BYTE (c);
 172
 173       if (! __C_conforms_to_XXX___ (c))
 174         break;
 175       if (! __C_strongly_suggests_XXX__ (c))
 176         found = CATEGORY_MASK_XXX;
 177     }
 178   /* The byte sequence is invalid for XXX.  */
 179   detect_info->rejected |= CATEGORY_MASK_XXX;
 180   return 0;
 181
 182  no_more_source:
 183   /* The source exausted successfully.  */
 184   detect_info->found |= found;
 185   return 1;
 186 }
 187 #endif
 188
 189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 190
 191   These functions decode a byte sequence specified as a source by
 192   CODING.  The resulting multibyte text goes to a place pointed to by
 193   CODING->charbuf, the length of which should not exceed
 194   CODING->charbuf_size;
 195
 196   These functions set the information of original and decoded texts in
 197   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 198   They also set CODING->result to one of CODING_RESULT_XXX indicating
 199   how the decoding is finished.
 200
 201   Below is the template of these functions.  */
 202
 203 #if 0
 204 static void
 205 decode_coding_XXXX (coding)
 206      struct coding_system *coding;
 207 {
 208   unsigned char *src = coding->source + coding->consumed;
 209   unsigned char *src_end = coding->source + coding->src_bytes;
 210   /* SRC_BASE remembers the start position in source in each loop.
 211      The loop will be exited when there's not enough source code, or
 212      when there's no room in CHARBUF for a decoded character.  */
 213   unsigned char *src_base;
 214   /* A buffer to produce decoded characters.  */
 215   int *charbuf = coding->charbuf;
 216   int *charbuf_end = charbuf + coding->charbuf_size;
 217   int multibytep = coding->src_multibyte;
 218
 219   while (1)
 220     {
 221       src_base = src;
 222       if (charbuf < charbuf_end)
 223         /* No more room to produce a decoded character.  */
 224         break;
 225       ONE_MORE_BYTE (c);
 226       /* Decode it. */
 227     }
 228
 229  no_more_source:
 230   if (src_base < src_end
 231       && coding->mode & CODING_MODE_LAST_BLOCK)
 232     /* If the source ends by partial bytes to construct a character,
 233        treat them as eight-bit raw data.  */
 234     while (src_base < src_end && charbuf < charbuf_end)
 235       *charbuf++ = *src_base++;
 236   /* Remember how many bytes and characters we consumed.  If the
 237      source is multibyte, the bytes and chars are not identical.  */
 238   coding->consumed = coding->consumed_char = src_base - coding->source;
 239   /* Remember how many characters we produced.  */
 240   coding->charbuf_used = charbuf - coding->charbuf;
 241 }
 242 #endif
 243
 244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 245
 246   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 247   internal multibyte format by CODING.  The resulting byte sequence
 248   goes to a place pointed to by DESTINATION, the length of which
 249   should not exceed DST_BYTES.
 250
 251   These functions set the information of original and encoded texts in
 252   the members produced, produced_char, consumed, and consumed_char of
 253   the structure *CODING.  They also set the member result to one of
 254   CODING_RESULT_XXX indicating how the encoding finished.
 255
 256   DST_BYTES zero means that source area and destination area are
 257   overlapped, which means that we can produce a encoded text until it
 258   reaches at the head of not-yet-encoded source text.
 259
 260   Below is a template of these functions.  */
 261 #if 0
 262 static void
 263 encode_coding_XXX (coding)
 264      struct coding_system *coding;
 265 {
 266   int multibytep = coding->dst_multibyte;
 267   int *charbuf = coding->charbuf;
 268   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 269   unsigned char *dst = coding->destination + coding->produced;
 270   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 271   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 272   int produced_chars = 0;
 273
 274   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 275     {
 276       int c = *charbuf;
 277       /* Encode C into DST, and increment DST.  */
 278     }
 279  label_no_more_destination:
 280   /* How many chars and bytes we produced.  */
 281   coding->produced_char += produced_chars;
 282   coding->produced = dst - coding->destination;
 283 }
 284 #endif
 285
 286 \f
 287 /*** 1. Preamble ***/
 288
 289 #include <config.h>
 290 #include <stdio.h>
 291
 292 #include "lisp.h"
 293 #include "buffer.h"
 294 #include "character.h"
 295 #include "charset.h"
 296 #include "ccl.h"
 297 #include "composite.h"
 298 #include "coding.h"
 299 #include "window.h"
 300
 301 Lisp_Object Vcoding_system_hash_table;
 302
 303 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 304 Lisp_Object Qunix, Qdos;
 305 extern Lisp_Object Qmac;        /* frame.c */
 306 Lisp_Object Qbuffer_file_coding_system;
 307 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 308 Lisp_Object Qdefault_char;
 309 Lisp_Object Qno_conversion, Qundecided;
 310 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 311 Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
 312 Lisp_Object Qcoding_system_history;
 313 Lisp_Object Qvalid_codes;
 314
 315 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 316 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 317 Lisp_Object Qstart_process, Qopen_network_stream;
 318 Lisp_Object Qtarget_idx;
 319
 320 Lisp_Object Vselect_safe_coding_system_function;
 321
 322 /* Mnemonic string for each format of end-of-line.  */
 323 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 324 /* Mnemonic string to indicate format of end-of-line is not yet
 325    decided.  */
 326 Lisp_Object eol_mnemonic_undecided;
 327
 328 #ifdef emacs
 329
 330 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 331
 332 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 333
 334 /* Coding system emacs-mule and raw-text are for converting only
 335    end-of-line format.  */
 336 Lisp_Object Qemacs_mule, Qraw_text;
 337
 338 /* Coding-systems are handed between Emacs Lisp programs and C internal
 339    routines by the following three variables.  */
 340 /* Coding-system for reading files and receiving data from process.  */
 341 Lisp_Object Vcoding_system_for_read;
 342 /* Coding-system for writing files and sending data to process.  */
 343 Lisp_Object Vcoding_system_for_write;
 344 /* Coding-system actually used in the latest I/O.  */
 345 Lisp_Object Vlast_coding_system_used;
 346
 347 /* A vector of length 256 which contains information about special
 348    Latin codes (especially for dealing with Microsoft codes).  */
 349 Lisp_Object Vlatin_extra_code_table;
 350
 351 /* Flag to inhibit code conversion of end-of-line format.  */
 352 int inhibit_eol_conversion;
 353
 354 /* Flag to inhibit ISO2022 escape sequence detection.  */
 355 int inhibit_iso_escape_detection;
 356
 357 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 358 int inherit_process_coding_system;
 359
 360 /* Coding system to be used to encode text for terminal display.  */
 361 struct coding_system terminal_coding;
 362
 363 /* Coding system to be used to encode text for terminal display when
 364    terminal coding system is nil.  */
 365 struct coding_system safe_terminal_coding;
 366
 367 /* Coding system of what is sent from terminal keyboard.  */
 368 struct coding_system keyboard_coding;
 369
 370 Lisp_Object Vfile_coding_system_alist;
 371 Lisp_Object Vprocess_coding_system_alist;
 372 Lisp_Object Vnetwork_coding_system_alist;
 373
 374 Lisp_Object Vlocale_coding_system;
 375
 376 #endif /* emacs */
 377
 378 /* Flag to tell if we look up translation table on character code
 379    conversion.  */
 380 Lisp_Object Venable_character_translation;
 381 /* Standard translation table to look up on decoding (reading).  */
 382 Lisp_Object Vstandard_translation_table_for_decode;
 383 /* Standard translation table to look up on encoding (writing).  */
 384 Lisp_Object Vstandard_translation_table_for_encode;
 385
 386 Lisp_Object Qtranslation_table;
 387 Lisp_Object Qtranslation_table_id;
 388 Lisp_Object Qtranslation_table_for_decode;
 389 Lisp_Object Qtranslation_table_for_encode;
 390
 391 /* Alist of charsets vs revision number.  */
 392 static Lisp_Object Vcharset_revision_table;
 393
 394 /* Default coding systems used for process I/O.  */
 395 Lisp_Object Vdefault_process_coding_system;
 396
 397 /* Global flag to tell that we can't call post-read-conversion and
 398    pre-write-conversion functions.  Usually the value is zero, but it
 399    is set to 1 temporarily while such functions are running.  This is
 400    to avoid infinite recursive call.  */
 401 static int inhibit_pre_post_conversion;
 402
 403 /* Two special coding systems.  */
 404 Lisp_Object Vsjis_coding_system;
 405 Lisp_Object Vbig5_coding_system;
 406
 407
 408 static int detect_coding_utf_8 P_ ((struct coding_system *,
 409                                     struct coding_detection_info *info));
 410 static void decode_coding_utf_8 P_ ((struct coding_system *));
 411 static int encode_coding_utf_8 P_ ((struct coding_system *));
 412
 413 static int detect_coding_utf_16 P_ ((struct coding_system *,
 414                                      struct coding_detection_info *info));
 415 static void decode_coding_utf_16 P_ ((struct coding_system *));
 416 static int encode_coding_utf_16 P_ ((struct coding_system *));
 417
 418 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 419                                        struct coding_detection_info *info));
 420 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 421 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 422
 423 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 424                                          struct coding_detection_info *info));
 425 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 426 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 427
 428 static int detect_coding_sjis P_ ((struct coding_system *,
 429                                    struct coding_detection_info *info));
 430 static void decode_coding_sjis P_ ((struct coding_system *));
 431 static int encode_coding_sjis P_ ((struct coding_system *));
 432
 433 static int detect_coding_big5 P_ ((struct coding_system *,
 434                                    struct coding_detection_info *info));
 435 static void decode_coding_big5 P_ ((struct coding_system *));
 436 static int encode_coding_big5 P_ ((struct coding_system *));
 437
 438 static int detect_coding_ccl P_ ((struct coding_system *,
 439                                   struct coding_detection_info *info));
 440 static void decode_coding_ccl P_ ((struct coding_system *));
 441 static int encode_coding_ccl P_ ((struct coding_system *));
 442
 443 static void decode_coding_raw_text P_ ((struct coding_system *));
 444 static int encode_coding_raw_text P_ ((struct coding_system *));
 445
 446
 447 /* ISO2022 section */
 448
 449 #define CODING_ISO_INITIAL(coding, reg)                 \
 450   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 451                      coding_attr_iso_initial),          \
 452                reg)))
 453
 454
 455 #define CODING_ISO_REQUEST(coding, charset_id)  \
 456   ((charset_id <= (coding)->max_charset_id      \
 457     ? (coding)->safe_charsets[charset_id]       \
 458     : -1))
 459
 460
 461 #define CODING_ISO_FLAGS(coding)        \
 462   ((coding)->spec.iso_2022.flags)
 463 #define CODING_ISO_DESIGNATION(coding, reg)     \
 464   ((coding)->spec.iso_2022.current_designation[reg])
 465 #define CODING_ISO_INVOCATION(coding, plane)    \
 466   ((coding)->spec.iso_2022.current_invocation[plane])
 467 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 468   ((coding)->spec.iso_2022.single_shifting)
 469 #define CODING_ISO_BOL(coding)  \
 470   ((coding)->spec.iso_2022.bol)
 471 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 472   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 473
 474 /* Control characters of ISO2022.  */
 475                         /* code */      /* function */
 476 #define ISO_CODE_LF     0x0A            /* line-feed */
 477 #define ISO_CODE_CR     0x0D            /* carriage-return */
 478 #define ISO_CODE_SO     0x0E            /* shift-out */
 479 #define ISO_CODE_SI     0x0F            /* shift-in */
 480 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 481 #define ISO_CODE_ESC    0x1B            /* escape */
 482 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 483 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 484 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 485
 486 /* All code (1-byte) of ISO2022 is classified into one of the
 487    followings.  */
 488 enum iso_code_class_type
 489   {
 490     ISO_control_0,              /* Control codes in the range
 491                                    0x00..0x1F and 0x7F, except for the
 492                                    following 5 codes.  */
 493     ISO_carriage_return,        /* ISO_CODE_CR (0x0D) */
 494     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 495     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 496     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 497     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 498     ISO_control_1,              /* Control codes in the range
 499                                    0x80..0x9F, except for the
 500                                    following 3 codes.  */
 501     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 502     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 503     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 504     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 505     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 506     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 507     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 508   };
 509
 510 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 511     `iso-flags' attribute of an iso2022 coding system.  */
 512
 513 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 514    instead of the correct short-form sequence (e.g. ESC $ A).  */
 515 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 516
 517 /* If set, reset graphic planes and registers at end-of-line to the
 518    initial state.  */
 519 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 520
 521 /* If set, reset graphic planes and registers before any control
 522    characters to the initial state.  */
 523 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 524
 525 /* If set, encode by 7-bit environment.  */
 526 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 527
 528 /* If set, use locking-shift function.  */
 529 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 530
 531 /* If set, use single-shift function.  Overwrite
 532    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 533 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 534
 535 /* If set, use designation escape sequence.  */
 536 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 537
 538 /* If set, produce revision number sequence.  */
 539 #define CODING_ISO_FLAG_REVISION        0x0080
 540
 541 /* If set, produce ISO6429's direction specifying sequence.  */
 542 #define CODING_ISO_FLAG_DIRECTION       0x0100
 543
 544 /* If set, assume designation states are reset at beginning of line on
 545    output.  */
 546 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 547
 548 /* If set, designation sequence should be placed at beginning of line
 549    on output.  */
 550 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 551
 552 /* If set, do not encode unsafe charactes on output.  */
 553 #define CODING_ISO_FLAG_SAFE            0x0800
 554
 555 /* If set, extra latin codes (128..159) are accepted as a valid code
 556    on input.  */
 557 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 558
 559 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 560
 561 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 562
 563 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 564
 565 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 566
 567 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 568
 569 /* A character to be produced on output if encoding of the original
 570    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 571 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 572
 573
 574 /* UTF-16 section */
 575 #define CODING_UTF_16_BOM(coding)       \
 576   ((coding)->spec.utf_16.bom)
 577
 578 #define CODING_UTF_16_ENDIAN(coding)    \
 579   ((coding)->spec.utf_16.endian)
 580
 581 #define CODING_UTF_16_SURROGATE(coding) \
 582   ((coding)->spec.utf_16.surrogate)
 583
 584
 585 /* CCL section */
 586 #define CODING_CCL_DECODER(coding)      \
 587   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 588 #define CODING_CCL_ENCODER(coding)      \
 589   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 590 #define CODING_CCL_VALIDS(coding)                                          \
 591   (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
 592    ->data)
 593
 594 /* Index for each coding category in `coding_categories' */
 595
 596 enum coding_category
 597   {
 598     coding_category_iso_7,
 599     coding_category_iso_7_tight,
 600     coding_category_iso_8_1,
 601     coding_category_iso_8_2,
 602     coding_category_iso_7_else,
 603     coding_category_iso_8_else,
 604     coding_category_utf_8,
 605     coding_category_utf_16_auto,
 606     coding_category_utf_16_be,
 607     coding_category_utf_16_le,
 608     coding_category_utf_16_be_nosig,
 609     coding_category_utf_16_le_nosig,
 610     coding_category_charset,
 611     coding_category_sjis,
 612     coding_category_big5,
 613     coding_category_ccl,
 614     coding_category_emacs_mule,
 615     /* All above are targets of code detection.  */
 616     coding_category_raw_text,
 617     coding_category_undecided,
 618     coding_category_max
 619   };
 620
 621 /* Definitions of flag bits used in detect_coding_XXXX.  */
 622 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 623 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 624 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 625 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 626 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 627 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 628 #define CATEGORY_MASK_UTF_8             (1 << coding_category_utf_8)
 629 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 630 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 631 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 632 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 633 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 634 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 635 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 636 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 637 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 638 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 639
 640 /* This value is returned if detect_coding_mask () find nothing other
 641    than ASCII characters.  */
 642 #define CATEGORY_MASK_ANY               \
 643   (CATEGORY_MASK_ISO_7                  \
 644    | CATEGORY_MASK_ISO_7_TIGHT          \
 645    | CATEGORY_MASK_ISO_8_1              \
 646    | CATEGORY_MASK_ISO_8_2              \
 647    | CATEGORY_MASK_ISO_7_ELSE           \
 648    | CATEGORY_MASK_ISO_8_ELSE           \
 649    | CATEGORY_MASK_UTF_8                \
 650    | CATEGORY_MASK_UTF_16_BE            \
 651    | CATEGORY_MASK_UTF_16_LE            \
 652    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 653    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 654    | CATEGORY_MASK_CHARSET              \
 655    | CATEGORY_MASK_SJIS                 \
 656    | CATEGORY_MASK_BIG5                 \
 657    | CATEGORY_MASK_CCL                  \
 658    | CATEGORY_MASK_EMACS_MULE)
 659
 660
 661 #define CATEGORY_MASK_ISO_7BIT \
 662   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 663
 664 #define CATEGORY_MASK_ISO_8BIT \
 665   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 666
 667 #define CATEGORY_MASK_ISO_ELSE \
 668   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 669
 670 #define CATEGORY_MASK_ISO_ESCAPE        \
 671   (CATEGORY_MASK_ISO_7                  \
 672    | CATEGORY_MASK_ISO_7_TIGHT          \
 673    | CATEGORY_MASK_ISO_7_ELSE           \
 674    | CATEGORY_MASK_ISO_8_ELSE)
 675
 676 #define CATEGORY_MASK_ISO       \
 677   (  CATEGORY_MASK_ISO_7BIT     \
 678      | CATEGORY_MASK_ISO_8BIT   \
 679      | CATEGORY_MASK_ISO_ELSE)
 680
 681 #define CATEGORY_MASK_UTF_16            \
 682   (CATEGORY_MASK_UTF_16_BE              \
 683    | CATEGORY_MASK_UTF_16_LE            \
 684    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 685    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 686
 687
 688 /* List of symbols `coding-category-xxx' ordered by priority.  This
 689    variable is exposed to Emacs Lisp.  */
 690 static Lisp_Object Vcoding_category_list;
 691
 692 /* Table of coding categories (Lisp symbols).  This variable is for
 693    internal use oly.  */
 694 static Lisp_Object Vcoding_category_table;
 695
 696 /* Table of coding-categories ordered by priority.  */
 697 static enum coding_category coding_priorities[coding_category_max];
 698
 699 /* Nth element is a coding context for the coding system bound to the
 700    Nth coding category.  */
 701 static struct coding_system coding_categories[coding_category_max];
 702
 703 static int detected_mask[coding_category_raw_text] =
 704   { CATEGORY_MASK_ISO,
 705     CATEGORY_MASK_ISO,
 706     CATEGORY_MASK_ISO,
 707     CATEGORY_MASK_ISO,
 708     CATEGORY_MASK_ISO,
 709     CATEGORY_MASK_ISO,
 710     CATEGORY_MASK_UTF_8,
 711     CATEGORY_MASK_UTF_16,
 712     CATEGORY_MASK_UTF_16,
 713     CATEGORY_MASK_UTF_16,
 714     CATEGORY_MASK_UTF_16,
 715     CATEGORY_MASK_UTF_16,
 716     CATEGORY_MASK_CHARSET,
 717     CATEGORY_MASK_SJIS,
 718     CATEGORY_MASK_BIG5,
 719     CATEGORY_MASK_CCL,
 720     CATEGORY_MASK_EMACS_MULE
 721   };
 722
 723 /*** Commonly used macros and functions ***/
 724
 725 #ifndef min
 726 #define min(a, b) ((a) < (b) ? (a) : (b))
 727 #endif
 728 #ifndef max
 729 #define max(a, b) ((a) > (b) ? (a) : (b))
 730 #endif
 731
 732 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list)  \
 733   do {                                                          \
 734     attrs = CODING_ID_ATTRS (coding->id);                       \
 735     eol_type = CODING_ID_EOL_TYPE (coding->id);                 \
 736     if (VECTORP (eol_type))                                     \
 737       eol_type = Qunix;                                         \
 738     charset_list = CODING_ATTR_CHARSET_LIST (attrs);            \
 739   } while (0)
 740
 741
 742 /* Safely get one byte from the source text pointed by SRC which ends
 743    at SRC_END, and set C to that byte.  If there are not enough bytes
 744    in the source, it jumps to `no_more_source'.  The caller
 745    should declare and set these variables appropriately in advance:
 746         src, src_end, multibytep
 747 */
 748
 749 #define ONE_MORE_BYTE(c)                                        \
 750   do {                                                          \
 751     if (src == src_end)                                         \
 752       {                                                         \
 753         if (src_base < src)                                     \
 754           coding->result = CODING_RESULT_INSUFFICIENT_SRC;      \
 755         goto no_more_source;                                    \
 756       }                                                         \
 757     c = *src++;                                                 \
 758     if (multibytep && (c & 0x80))                               \
 759       {                                                         \
 760         if ((c & 0xFE) != 0xC0)                                 \
 761           error ("Undecodable char found");                     \
 762         c = ((c & 1) << 6) | *src++;                            \
 763       }                                                         \
 764     consumed_chars++;                                           \
 765   } while (0)
 766
 767
 768 #define ONE_MORE_BYTE_NO_CHECK(c)               \
 769   do {                                          \
 770     c = *src++;                                 \
 771     if (multibytep && (c & 0x80))               \
 772       {                                         \
 773         if ((c & 0xFE) != 0xC0)                 \
 774           error ("Undecodable char found");     \
 775         c = ((c & 1) << 6) | *src++;            \
 776       }                                         \
 777     consumed_chars++;                           \
 778   } while (0)
 779
 780
 781 /* Store a byte C in the place pointed by DST and increment DST to the
 782    next free point, and increment PRODUCED_CHARS.  The caller should
 783    assure that C is 0..127, and declare and set the variable `dst'
 784    appropriately in advance.
 785 */
 786
 787
 788 #define EMIT_ONE_ASCII_BYTE(c)  \
 789   do {                          \
 790     produced_chars++;           \
 791     *dst++ = (c);               \
 792   } while (0)
 793
 794
 795 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 796
 797 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 798   do {                                  \
 799     produced_chars += 2;                \
 800     *dst++ = (c1), *dst++ = (c2);       \
 801   } while (0)
 802
 803
 804 /* Store a byte C in the place pointed by DST and increment DST to the
 805    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 806    nonzero, store in an appropriate multibyte from.  The caller should
 807    declare and set the variables `dst' and `multibytep' appropriately
 808    in advance.  */
 809
 810 #define EMIT_ONE_BYTE(c)                \
 811   do {                                  \
 812     produced_chars++;                   \
 813     if (multibytep)                     \
 814       {                                 \
 815         int ch = (c);                   \
 816         if (ch >= 0x80)                 \
 817           ch = BYTE8_TO_CHAR (ch);      \
 818         CHAR_STRING_ADVANCE (ch, dst);  \
 819       }                                 \
 820     else                                \
 821       *dst++ = (c);                     \
 822   } while (0)
 823
 824
 825 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 826
 827 #define EMIT_TWO_BYTES(c1, c2)          \
 828   do {                                  \
 829     produced_chars += 2;                \
 830     if (multibytep)                     \
 831       {                                 \
 832         int ch;                         \
 833                                         \
 834         ch = (c1);                      \
 835         if (ch >= 0x80)                 \
 836           ch = BYTE8_TO_CHAR (ch);      \
 837         CHAR_STRING_ADVANCE (ch, dst);  \
 838         ch = (c2);                      \
 839         if (ch >= 0x80)                 \
 840           ch = BYTE8_TO_CHAR (ch);      \
 841         CHAR_STRING_ADVANCE (ch, dst);  \
 842       }                                 \
 843     else                                \
 844       {                                 \
 845         *dst++ = (c1);                  \
 846         *dst++ = (c2);                  \
 847       }                                 \
 848   } while (0)
 849
 850
 851 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 852   do {                                  \
 853     EMIT_ONE_BYTE (c1);                 \
 854     EMIT_TWO_BYTES (c2, c3);            \
 855   } while (0)
 856
 857
 858 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 859   do {                                          \
 860     EMIT_TWO_BYTES (c1, c2);                    \
 861     EMIT_TWO_BYTES (c3, c4);                    \
 862   } while (0)
 863
 864
 865 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 866   do {                                                                       \
 867     charset_map_loaded = 0;                                                  \
 868     c = DECODE_CHAR (charset, code);                                         \
 869     if (charset_map_loaded)                                                  \
 870       {                                                                      \
 871         unsigned char *orig = coding->source;                                \
 872         EMACS_INT offset;                                                    \
 873                                                                              \
 874         coding_set_source (coding);                                          \
 875         offset = coding->source - orig;                                      \
 876         src += offset;                                                       \
 877         src_base += offset;                                                  \
 878         src_end += offset;                                                   \
 879       }                                                                      \
 880   } while (0)
 881
 882
 883 #define ASSURE_DESTINATION(bytes)                               \
 884   do {                                                          \
 885     if (dst + (bytes) >= dst_end)                               \
 886       {                                                         \
 887         int more_bytes = charbuf_end - charbuf + (bytes);       \
 888                                                                 \
 889         dst = alloc_destination (coding, more_bytes, dst);      \
 890         dst_end = coding->destination + coding->dst_bytes;      \
 891       }                                                         \
 892   } while (0)
 893
 894
 895
 896 static void
 897 coding_set_source (coding)
 898      struct coding_system *coding;
 899 {
 900   if (BUFFERP (coding->src_object))
 901     {
 902       if (coding->src_pos < 0)
 903         coding->source = GAP_END_ADDR + coding->src_pos_byte;
 904       else
 905         {
 906           struct buffer *buf = XBUFFER (coding->src_object);
 907           EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
 908           unsigned char *beg_addr = BUF_BEG_ADDR (buf);
 909
 910           coding->source = beg_addr + coding->src_pos_byte - 1;
 911           if (coding->src_pos_byte >= gpt_byte)
 912             coding->source += BUF_GAP_SIZE (buf);
 913         }
 914     }
 915   else if (STRINGP (coding->src_object))
 916     {
 917       coding->source = (XSTRING (coding->src_object)->data
 918                         + coding->src_pos_byte);
 919     }
 920   else
 921     /* Otherwise, the source is C string and is never relocated
 922        automatically.  Thus we don't have to update anything.  */
 923     ;
 924 }
 925
 926 static void
 927 coding_set_destination (coding)
 928      struct coding_system *coding;
 929 {
 930   if (BUFFERP (coding->dst_object))
 931     {
 932       if (coding->src_pos < 0)
 933         {
 934           coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
 935           coding->dst_bytes = (GAP_END_ADDR
 936                                - (coding->src_bytes - coding->consumed)
 937                                - coding->destination);
 938         }
 939       else
 940         {
 941           /* We are sure that coding->dst_pos_byte is before the gap
 942              of the buffer. */
 943           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
 944                                  + coding->dst_pos_byte - 1);
 945           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
 946                                - coding->destination);
 947         }
 948     }
 949   else
 950     /* Otherwise, the destination is C string and is never relocated
 951        automatically.  Thus we don't have to update anything.  */
 952     ;
 953 }
 954
 955
 956 static void
 957 coding_alloc_by_realloc (coding, bytes)
 958      struct coding_system *coding;
 959      EMACS_INT bytes;
 960 {
 961   coding->destination = (unsigned char *) xrealloc (coding->destination,
 962                                                     coding->dst_bytes + bytes);
 963   coding->dst_bytes += bytes;
 964 }
 965
 966 static void
 967 coding_alloc_by_making_gap (coding, bytes)
 968      struct coding_system *coding;
 969      EMACS_INT bytes;
 970 {
 971   if (BUFFERP (coding->dst_object)
 972       && EQ (coding->src_object, coding->dst_object))
 973     {
 974       EMACS_INT add = coding->src_bytes - coding->consumed;
 975
 976       GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
 977       make_gap (bytes);
 978       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
 979     }
 980   else
 981     {
 982       Lisp_Object this_buffer;
 983
 984       this_buffer = Fcurrent_buffer ();
 985       set_buffer_internal (XBUFFER (coding->dst_object));
 986       make_gap (bytes);
 987       set_buffer_internal (XBUFFER (this_buffer));
 988     }
 989 }
 990
 991
 992 static unsigned char *
 993 alloc_destination (coding, nbytes, dst)
 994      struct coding_system *coding;
 995      int nbytes;
 996      unsigned char *dst;
 997 {
 998   EMACS_INT offset = dst - coding->destination;
 999
1000   if (BUFFERP (coding->dst_object))
1001     coding_alloc_by_making_gap (coding, nbytes);
1002   else
1003     coding_alloc_by_realloc (coding, nbytes);
1004   coding->result = CODING_RESULT_SUCCESS;
1005   coding_set_destination (coding);
1006   dst = coding->destination + offset;
1007   return dst;
1008 }
1009
1010 /** Macros for annotations.  */
1011
1012 /* Maximum length of annotation data (sum of annotations for
1013    composition and charset).  */
1014 #define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
1015
1016 /* An annotation data is stored in the array coding->charbuf in this
1017    format:
1018      [ -LENGTH ANNOTATION_MASK FROM TO ... ]
1019    LENGTH is the number of elements in the annotation.
1020    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1021    FROM and TO specify the range of text annotated.  They are relative
1022    to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
1023
1024    The format of the following elements depend on ANNOTATION_MASK.
1025
1026    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1027    follows:
1028      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1029    METHOD is one of enum composition_method.
1030    Optionnal COMPOSITION-COMPONENTS are characters and composition
1031    rules.
1032
1033    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1034    follows.  */
1035
1036 #define ADD_ANNOTATION_DATA(buf, len, mask, from, to)   \
1037   do {                                                  \
1038     *(buf)++ = -(len);                                  \
1039     *(buf)++ = (mask);                                  \
1040     *(buf)++ = (from);                                  \
1041     *(buf)++ = (to);                                    \
1042     coding->annotated = 1;                              \
1043   } while (0);
1044
1045 #define ADD_COMPOSITION_DATA(buf, from, to, method)                           \
1046   do {                                                                        \
1047     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1048     *buf++ = method;                                                          \
1049   } while (0)
1050
1051
1052 #define ADD_CHARSET_DATA(buf, from, to, id)                               \
1053   do {                                                                    \
1054     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1055     *buf++ = id;                                                          \
1056   } while (0)
1057
1058 \f
1059 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1060
1061
1062
1063 \f
1064 /*** 3. UTF-8 ***/
1065
1066 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1067    Check if a text is encoded in UTF-8.  If it is, return 1, else
1068    return 0.  */
1069
1070 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1071 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1072 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1073 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1074 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1075 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1076
1077 static int
1078 detect_coding_utf_8 (coding, detect_info)
1079      struct coding_system *coding;
1080      struct coding_detection_info *detect_info;
1081 {
1082   unsigned char *src = coding->source, *src_base = src;
1083   unsigned char *src_end = coding->source + coding->src_bytes;
1084   int multibytep = coding->src_multibyte;
1085   int consumed_chars = 0;
1086   int found = 0;
1087   int incomplete;
1088
1089   detect_info->checked |= CATEGORY_MASK_UTF_8;
1090   /* A coding system of this category is always ASCII compatible.  */
1091   src += coding->head_ascii;
1092
1093   while (1)
1094     {
1095       int c, c1, c2, c3, c4;
1096
1097       incomplete = 0;
1098       ONE_MORE_BYTE (c);
1099       if (UTF_8_1_OCTET_P (c))
1100         continue;
1101       incomplete = 1;
1102       ONE_MORE_BYTE (c1);
1103       if (! UTF_8_EXTRA_OCTET_P (c1))
1104         break;
1105       if (UTF_8_2_OCTET_LEADING_P (c))
1106         {
1107           found = CATEGORY_MASK_UTF_8;
1108           continue;
1109         }
1110       ONE_MORE_BYTE (c2);
1111       if (! UTF_8_EXTRA_OCTET_P (c2))
1112         break;
1113       if (UTF_8_3_OCTET_LEADING_P (c))
1114         {
1115           found = CATEGORY_MASK_UTF_8;
1116           continue;
1117         }
1118       ONE_MORE_BYTE (c3);
1119       if (! UTF_8_EXTRA_OCTET_P (c3))
1120         break;
1121       if (UTF_8_4_OCTET_LEADING_P (c))
1122         {
1123           found = CATEGORY_MASK_UTF_8;
1124           continue;
1125         }
1126       ONE_MORE_BYTE (c4);
1127       if (! UTF_8_EXTRA_OCTET_P (c4))
1128         break;
1129       if (UTF_8_5_OCTET_LEADING_P (c))
1130         {
1131           found = CATEGORY_MASK_UTF_8;
1132           continue;
1133         }
1134       break;
1135     }
1136   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1137   return 0;
1138
1139  no_more_source:
1140   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1141     {
1142       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1143       return 0;
1144     }
1145   detect_info->found |= found;
1146   return 1;
1147 }
1148
1149
1150 static void
1151 decode_coding_utf_8 (coding)
1152      struct coding_system *coding;
1153 {
1154   unsigned char *src = coding->source + coding->consumed;
1155   unsigned char *src_end = coding->source + coding->src_bytes;
1156   unsigned char *src_base;
1157   int *charbuf = coding->charbuf;
1158   int *charbuf_end = charbuf + coding->charbuf_size;
1159   int consumed_chars = 0, consumed_chars_base;
1160   int multibytep = coding->src_multibyte;
1161   Lisp_Object attr, eol_type, charset_list;
1162
1163   CODING_GET_INFO (coding, attr, eol_type, charset_list);
1164
1165   while (1)
1166     {
1167       int c, c1, c2, c3, c4, c5;
1168
1169       src_base = src;
1170       consumed_chars_base = consumed_chars;
1171
1172       if (charbuf >= charbuf_end)
1173         break;
1174
1175       ONE_MORE_BYTE (c1);
1176       if (UTF_8_1_OCTET_P(c1))
1177         {
1178           c = c1;
1179           if (c == '\r')
1180             {
1181               if (EQ (eol_type, Qdos))
1182                 {
1183                   if (src == src_end)
1184                     {
1185                       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1186                       goto no_more_source;
1187                     }
1188                   if (*src == '\n')
1189                     ONE_MORE_BYTE (c);
1190                 }
1191               else if (EQ (eol_type, Qmac))
1192                 c = '\n';
1193             }
1194         }
1195       else
1196         {
1197           ONE_MORE_BYTE (c2);
1198           if (! UTF_8_EXTRA_OCTET_P (c2))
1199             goto invalid_code;
1200           if (UTF_8_2_OCTET_LEADING_P (c1))
1201             {
1202               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1203               /* Reject overlong sequences here and below.  Encoders
1204                  producing them are incorrect, they can be misleading,
1205                  and they mess up read/write invariance.  */
1206               if (c < 128)
1207                 goto invalid_code;
1208             }
1209           else
1210             {
1211               ONE_MORE_BYTE (c3);
1212               if (! UTF_8_EXTRA_OCTET_P (c3))
1213                 goto invalid_code;
1214               if (UTF_8_3_OCTET_LEADING_P (c1))
1215                 {
1216                   c = (((c1 & 0xF) << 12)
1217                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1218                   if (c < 0x800
1219                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1220                     goto invalid_code;
1221                 }
1222               else
1223                 {
1224                   ONE_MORE_BYTE (c4);
1225                   if (! UTF_8_EXTRA_OCTET_P (c4))
1226                     goto invalid_code;
1227                   if (UTF_8_4_OCTET_LEADING_P (c1))
1228                     {
1229                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1230                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1231                     if (c < 0x10000)
1232                       goto invalid_code;
1233                     }
1234                   else
1235                     {
1236                       ONE_MORE_BYTE (c5);
1237                       if (! UTF_8_EXTRA_OCTET_P (c5))
1238                         goto invalid_code;
1239                       if (UTF_8_5_OCTET_LEADING_P (c1))
1240                         {
1241                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1242                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1243                                | (c5 & 0x3F));
1244                           if ((c > MAX_CHAR) || (c < 0x200000))
1245                             goto invalid_code;
1246                         }
1247                       else
1248                         goto invalid_code;
1249                     }
1250                 }
1251             }
1252         }
1253
1254       *charbuf++ = c;
1255       continue;
1256
1257     invalid_code:
1258       src = src_base;
1259       consumed_chars = consumed_chars_base;
1260       ONE_MORE_BYTE (c);
1261       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1262       coding->errors++;
1263     }
1264
1265  no_more_source:
1266   coding->consumed_char += consumed_chars_base;
1267   coding->consumed = src_base - coding->source;
1268   coding->charbuf_used = charbuf - coding->charbuf;
1269 }
1270
1271
1272 static int
1273 encode_coding_utf_8 (coding)
1274      struct coding_system *coding;
1275 {
1276   int multibytep = coding->dst_multibyte;
1277   int *charbuf = coding->charbuf;
1278   int *charbuf_end = charbuf + coding->charbuf_used;
1279   unsigned char *dst = coding->destination + coding->produced;
1280   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1281   int produced_chars = 0;
1282   int c;
1283
1284   if (multibytep)
1285     {
1286       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1287
1288       while (charbuf < charbuf_end)
1289         {
1290           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1291
1292           ASSURE_DESTINATION (safe_room);
1293           c = *charbuf++;
1294           if (CHAR_BYTE8_P (c))
1295             {
1296               c = CHAR_TO_BYTE8 (c);
1297               EMIT_ONE_BYTE (c);
1298             }
1299           else
1300             {
1301               CHAR_STRING_ADVANCE (c, pend);
1302               for (p = str; p < pend; p++)
1303                 EMIT_ONE_BYTE (*p);
1304             }
1305         }
1306     }
1307   else
1308     {
1309       int safe_room = MAX_MULTIBYTE_LENGTH;
1310
1311       while (charbuf < charbuf_end)
1312         {
1313           ASSURE_DESTINATION (safe_room);
1314           c = *charbuf++;
1315           dst += CHAR_STRING (c, dst);
1316           produced_chars++;
1317         }
1318     }
1319   coding->result = CODING_RESULT_SUCCESS;
1320   coding->produced_char += produced_chars;
1321   coding->produced = dst - coding->destination;
1322   return 0;
1323 }
1324
1325
1326 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1327    Check if a text is encoded in one of UTF-16 based coding systems.
1328    If it is, return 1, else return 0.  */
1329
1330 #define UTF_16_HIGH_SURROGATE_P(val) \
1331   (((val) & 0xFC00) == 0xD800)
1332
1333 #define UTF_16_LOW_SURROGATE_P(val) \
1334   (((val) & 0xFC00) == 0xDC00)
1335
1336 #define UTF_16_INVALID_P(val)   \
1337   (((val) == 0xFFFE)            \
1338    || ((val) == 0xFFFF)         \
1339    || UTF_16_LOW_SURROGATE_P (val))
1340
1341
1342 static int
1343 detect_coding_utf_16 (coding, detect_info)
1344      struct coding_system *coding;
1345      struct coding_detection_info *detect_info;
1346 {
1347   unsigned char *src = coding->source, *src_base = src;
1348   unsigned char *src_end = coding->source + coding->src_bytes;
1349   int multibytep = coding->src_multibyte;
1350   int consumed_chars = 0;
1351   int c1, c2;
1352
1353   detect_info->checked |= CATEGORY_MASK_UTF_16;
1354
1355   if (coding->mode & CODING_MODE_LAST_BLOCK
1356       && (coding->src_bytes & 1))
1357     {
1358       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1359       return 0;
1360     }
1361   ONE_MORE_BYTE (c1);
1362   ONE_MORE_BYTE (c2);
1363
1364   if ((c1 == 0xFF) && (c2 == 0xFE))
1365     {
1366       detect_info->found |= CATEGORY_MASK_UTF_16_LE;
1367       detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
1368     }
1369   else if ((c1 == 0xFE) && (c2 == 0xFF))
1370     {
1371       detect_info->found |= CATEGORY_MASK_UTF_16_BE;
1372       detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
1373     }
1374  no_more_source:
1375   return 1;
1376 }
1377
1378 static void
1379 decode_coding_utf_16 (coding)
1380      struct coding_system *coding;
1381 {
1382   unsigned char *src = coding->source + coding->consumed;
1383   unsigned char *src_end = coding->source + coding->src_bytes;
1384   unsigned char *src_base;
1385   int *charbuf = coding->charbuf;
1386   int *charbuf_end = charbuf + coding->charbuf_size;
1387   int consumed_chars = 0, consumed_chars_base;
1388   int multibytep = coding->src_multibyte;
1389   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1390   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1391   int surrogate = CODING_UTF_16_SURROGATE (coding);
1392   Lisp_Object attr, eol_type, charset_list;
1393
1394   CODING_GET_INFO (coding, attr, eol_type, charset_list);
1395
1396   if (bom != utf_16_without_bom)
1397     {
1398       int c, c1, c2;
1399
1400       src_base = src;
1401       ONE_MORE_BYTE (c1);
1402       ONE_MORE_BYTE (c2);
1403       c = (c1 << 8) | c2;
1404       if (bom == utf_16_with_bom)
1405         {
1406           if (endian == utf_16_big_endian
1407               ? c != 0xFEFF : c != 0xFFFE)
1408             {
1409               /* We are sure that there's enouph room at CHARBUF.  */
1410               *charbuf++ = c1;
1411               *charbuf++ = c2;
1412               coding->errors++;
1413             }
1414         }
1415       else
1416         {
1417           if (c == 0xFEFF)
1418             CODING_UTF_16_ENDIAN (coding)
1419               = endian = utf_16_big_endian;
1420           else if (c == 0xFFFE)
1421             CODING_UTF_16_ENDIAN (coding)
1422               = endian = utf_16_little_endian;
1423           else
1424             {
1425               CODING_UTF_16_ENDIAN (coding)
1426                 = endian = utf_16_big_endian;
1427               src = src_base;
1428             }
1429         }
1430       CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1431     }
1432
1433   while (1)
1434     {
1435       int c, c1, c2;
1436
1437       src_base = src;
1438       consumed_chars_base = consumed_chars;
1439
1440       if (charbuf + 2 >= charbuf_end)
1441         break;
1442
1443       ONE_MORE_BYTE (c1);
1444       ONE_MORE_BYTE (c2);
1445       c = (endian == utf_16_big_endian
1446            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1447       if (surrogate)
1448         {
1449           if (! UTF_16_LOW_SURROGATE_P (c))
1450             {
1451               if (endian == utf_16_big_endian)
1452                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1453               else
1454                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1455               *charbuf++ = c1;
1456               *charbuf++ = c2;
1457               coding->errors++;
1458               if (UTF_16_HIGH_SURROGATE_P (c))
1459                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1460               else
1461                 *charbuf++ = c;
1462             }
1463           else
1464             {
1465               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1466               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1467               *charbuf++ = c;
1468             }
1469         }
1470       else
1471         {
1472           if (UTF_16_HIGH_SURROGATE_P (c))
1473             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1474           else
1475             *charbuf++ = c;
1476         }
1477     }
1478
1479  no_more_source:
1480   coding->consumed_char += consumed_chars_base;
1481   coding->consumed = src_base - coding->source;
1482   coding->charbuf_used = charbuf - coding->charbuf;
1483 }
1484
1485 static int
1486 encode_coding_utf_16 (coding)
1487      struct coding_system *coding;
1488 {
1489   int multibytep = coding->dst_multibyte;
1490   int *charbuf = coding->charbuf;
1491   int *charbuf_end = charbuf + coding->charbuf_used;
1492   unsigned char *dst = coding->destination + coding->produced;
1493   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1494   int safe_room = 8;
1495   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1496   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1497   int produced_chars = 0;
1498   Lisp_Object attrs, eol_type, charset_list;
1499   int c;
1500
1501   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1502
1503   if (bom == utf_16_with_bom)
1504     {
1505       ASSURE_DESTINATION (safe_room);
1506       if (big_endian)
1507         EMIT_TWO_BYTES (0xFE, 0xFF);
1508       else
1509         EMIT_TWO_BYTES (0xFF, 0xFE);
1510       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1511     }
1512
1513   while (charbuf < charbuf_end)
1514     {
1515       ASSURE_DESTINATION (safe_room);
1516       c = *charbuf++;
1517       if (c >= MAX_UNICODE_CHAR)
1518         c = coding->default_char;
1519
1520       if (c < 0x10000)
1521         {
1522           if (big_endian)
1523             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1524           else
1525             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1526         }
1527       else
1528         {
1529           int c1, c2;
1530
1531           c -= 0x10000;
1532           c1 = (c >> 10) + 0xD800;
1533           c2 = (c & 0x3FF) + 0xDC00;
1534           if (big_endian)
1535             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1536           else
1537             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1538         }
1539     }
1540   coding->result = CODING_RESULT_SUCCESS;
1541   coding->produced = dst - coding->destination;
1542   coding->produced_char += produced_chars;
1543   return 0;
1544 }
1545
1546 \f
1547 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1548
1549 /* Emacs' internal format for representation of multiple character
1550    sets is a kind of multi-byte encoding, i.e. characters are
1551    represented by variable-length sequences of one-byte codes.
1552
1553    ASCII characters and control characters (e.g. `tab', `newline') are
1554    represented by one-byte sequences which are their ASCII codes, in
1555    the range 0x00 through 0x7F.
1556
1557    8-bit characters of the range 0x80..0x9F are represented by
1558    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1559    code + 0x20).
1560
1561    8-bit characters of the range 0xA0..0xFF are represented by
1562    one-byte sequences which are their 8-bit code.
1563
1564    The other characters are represented by a sequence of `base
1565    leading-code', optional `extended leading-code', and one or two
1566    `position-code's.  The length of the sequence is determined by the
1567    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1568    whereas extended leading-code and position-code take the range 0xA0
1569    through 0xFF.  See `charset.h' for more details about leading-code
1570    and position-code.
1571
1572    --- CODE RANGE of Emacs' internal format ---
1573    character set        range
1574    -------------        -----
1575    ascii                0x00..0x7F
1576    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1577    eight-bit-graphic    0xA0..0xBF
1578    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1579    ---------------------------------------------
1580
1581    As this is the internal character representation, the format is
1582    usually not used externally (i.e. in a file or in a data sent to a
1583    process).  But, it is possible to have a text externally in this
1584    format (i.e. by encoding by the coding system `emacs-mule').
1585
1586    In that case, a sequence of one-byte codes has a slightly different
1587    form.
1588
1589    At first, all characters in eight-bit-control are represented by
1590    one-byte sequences which are their 8-bit code.
1591
1592    Next, character composition data are represented by the byte
1593    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1594    where,
1595         METHOD is 0xF0 plus one of composition method (enum
1596         composition_method),
1597
1598         BYTES is 0xA0 plus a byte length of this composition data,
1599
1600         CHARS is 0x20 plus a number of characters composed by this
1601         data,
1602
1603         COMPONENTs are characters of multibye form or composition
1604         rules encoded by two-byte of ASCII codes.
1605
1606    In addition, for backward compatibility, the following formats are
1607    also recognized as composition data on decoding.
1608
1609    0x80 MSEQ ...
1610    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1611
1612    Here,
1613         MSEQ is a multibyte form but in these special format:
1614           ASCII: 0xA0 ASCII_CODE+0x80,
1615           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1616         RULE is a one byte code of the range 0xA0..0xF0 that
1617         represents a composition rule.
1618   */
1619
1620 char emacs_mule_bytes[256];
1621
1622 int
1623 emacs_mule_char (coding, src, nbytes, nchars, id)
1624      struct coding_system *coding;
1625      unsigned char *src;
1626      int *nbytes, *nchars, *id;
1627 {
1628   unsigned char *src_end = coding->source + coding->src_bytes;
1629   int multibytep = coding->src_multibyte;
1630   unsigned char *src_base = src;
1631   struct charset *charset;
1632   unsigned code;
1633   int c;
1634   int consumed_chars = 0;
1635
1636   ONE_MORE_BYTE (c);
1637   switch (emacs_mule_bytes[c])
1638     {
1639     case 2:
1640       if (! (charset = emacs_mule_charset[c]))
1641         goto invalid_code;
1642       ONE_MORE_BYTE (c);
1643       code = c & 0x7F;
1644       break;
1645
1646     case 3:
1647       if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1648           || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1649         {
1650           ONE_MORE_BYTE (c);
1651           if (! (charset = emacs_mule_charset[c]))
1652             goto invalid_code;
1653           ONE_MORE_BYTE (c);
1654           code = c & 0x7F;
1655         }
1656       else
1657         {
1658           if (! (charset = emacs_mule_charset[c]))
1659             goto invalid_code;
1660           ONE_MORE_BYTE (c);
1661           code = (c & 0x7F) << 8;
1662           ONE_MORE_BYTE (c);
1663           code |= c & 0x7F;
1664         }
1665       break;
1666
1667     case 4:
1668       ONE_MORE_BYTE (c);
1669       if (! (charset = emacs_mule_charset[c]))
1670         goto invalid_code;
1671       ONE_MORE_BYTE (c);
1672       code = (c & 0x7F) << 8;
1673       ONE_MORE_BYTE (c);
1674       code |= c & 0x7F;
1675       break;
1676
1677     case 1:
1678       code = c;
1679       charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1680                                  ? charset_ascii : charset_eight_bit);
1681       break;
1682
1683     default:
1684       abort ();
1685     }
1686   c = DECODE_CHAR (charset, code);
1687   if (c < 0)
1688     goto invalid_code;
1689   *nbytes = src - src_base;
1690   *nchars = consumed_chars;
1691   if (id)
1692     *id = charset->id;
1693   return c;
1694
1695  no_more_source:
1696   return -2;
1697
1698  invalid_code:
1699   return -1;
1700 }
1701
1702
1703 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1704    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1705    else return 0.  */
1706
1707 static int
1708 detect_coding_emacs_mule (coding, detect_info)
1709      struct coding_system *coding;
1710      struct coding_detection_info *detect_info;
1711 {
1712   unsigned char *src = coding->source, *src_base = src;
1713   unsigned char *src_end = coding->source + coding->src_bytes;
1714   int multibytep = coding->src_multibyte;
1715   int consumed_chars = 0;
1716   int c;
1717   int found = 0;
1718   int incomplete;
1719
1720   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1721   /* A coding system of this category is always ASCII compatible.  */
1722   src += coding->head_ascii;
1723
1724   while (1)
1725     {
1726       incomplete = 0;
1727       ONE_MORE_BYTE (c);
1728       incomplete = 1;
1729
1730       if (c == 0x80)
1731         {
1732           /* Perhaps the start of composite character.  We simple skip
1733              it because analyzing it is too heavy for detecting.  But,
1734              at least, we check that the composite character
1735              constitues of more than 4 bytes.  */
1736           unsigned char *src_base;
1737
1738         repeat:
1739           src_base = src;
1740           do
1741             {
1742               ONE_MORE_BYTE (c);
1743             }
1744           while (c >= 0xA0);
1745
1746           if (src - src_base <= 4)
1747             break;
1748           found = CATEGORY_MASK_EMACS_MULE;
1749           if (c == 0x80)
1750             goto repeat;
1751         }
1752
1753       if (c < 0x80)
1754         {
1755           if (c < 0x20
1756               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1757             break;
1758         }
1759       else
1760         {
1761           unsigned char *src_base = src - 1;
1762
1763           do
1764             {
1765               ONE_MORE_BYTE (c);
1766             }
1767           while (c >= 0xA0);
1768           if (src - src_base != emacs_mule_bytes[*src_base])
1769             break;
1770           found = CATEGORY_MASK_EMACS_MULE;
1771         }
1772     }
1773   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1774   return 0;
1775
1776  no_more_source:
1777   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1778     {
1779       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1780       return 0;
1781     }
1782   detect_info->found |= found;
1783   return 1;
1784 }
1785
1786
1787 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1788
1789 /* Decode a character represented as a component of composition
1790    sequence of Emacs 20/21 style at SRC.  Set C to that character and
1791    update SRC to the head of next character (or an encoded composition
1792    rule).  If SRC doesn't points a composition component, set C to -1.
1793    If SRC points an invalid byte sequence, global exit by a return
1794    value 0.  */
1795
1796 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
1797   if (1)                                                        \
1798     {                                                           \
1799       int c;                                                    \
1800       int nbytes, nchars;                                       \
1801                                                                 \
1802       if (src == src_end)                                       \
1803         break;                                                  \
1804       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1805       if (c < 0)                                                \
1806         {                                                       \
1807           if (c == -2)                                          \
1808             break;                                              \
1809           goto invalid_code;                                    \
1810         }                                                       \
1811       *buf++ = c;                                               \
1812       src += nbytes;                                            \
1813       consumed_chars += nchars;                                 \
1814     }                                                           \
1815   else
1816
1817
1818 /* Decode a composition rule represented as a component of composition
1819    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
1820    and increment BUF.  If SRC points an invalid byte sequence, set C
1821    to -1.  */
1822
1823 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
1824   do {                                                  \
1825     int c, gref, nref;                                  \
1826                                                         \
1827     if (src >= src_end)                                 \
1828       goto invalid_code;                                \
1829     ONE_MORE_BYTE_NO_CHECK (c);                         \
1830     c -= 0x20;                                          \
1831     if (c < 0 || c >= 81)                               \
1832       goto invalid_code;                                \
1833                                                         \
1834     gref = c / 9, nref = c % 9;                         \
1835     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1836   } while (0)
1837
1838
1839 /* Decode a composition rule represented as a component of composition
1840    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
1841    and increment BUF.  If SRC points an invalid byte sequence, set C
1842    to -1.  */
1843
1844 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
1845   do {                                                  \
1846     int gref, nref;                                     \
1847                                                         \
1848     if (src + 1>= src_end)                              \
1849       goto invalid_code;                                \
1850     ONE_MORE_BYTE_NO_CHECK (gref);                      \
1851     gref -= 0x20;                                       \
1852     ONE_MORE_BYTE_NO_CHECK (nref);                      \
1853     nref -= 0x20;                                       \
1854     if (gref < 0 || gref >= 81                          \
1855         || nref < 0 || nref >= 81)                      \
1856       goto invalid_code;                                \
1857     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1858   } while (0)
1859
1860
1861 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
1862   do {                                                                  \
1863     /* Emacs 21 style format.  The first three bytes at SRC are         \
1864        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
1865        the byte length of this composition information, CHARS is the    \
1866        number of characters composed by this composition.  */           \
1867     enum composition_method method = c - 0xF2;                          \
1868     int *charbuf_base = charbuf;                                        \
1869     int from, to;                                                       \
1870     int consumed_chars_limit;                                           \
1871     int nbytes, nchars;                                                 \
1872                                                                         \
1873     ONE_MORE_BYTE (c);                                                  \
1874     nbytes = c - 0xA0;                                                  \
1875     if (nbytes < 3)                                                     \
1876       goto invalid_code;                                                \
1877     ONE_MORE_BYTE (c);                                                  \
1878     nchars = c - 0xA0;                                                  \
1879     from = coding->produced + char_offset;                              \
1880     to = from + nchars;                                                 \
1881     ADD_COMPOSITION_DATA (charbuf, from, to, method);                   \
1882     consumed_chars_limit = consumed_chars_base + nbytes;                \
1883     if (method != COMPOSITION_RELATIVE)                                 \
1884       {                                                                 \
1885         int i = 0;                                                      \
1886         while (consumed_chars < consumed_chars_limit)                   \
1887           {                                                             \
1888             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
1889               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
1890             else                                                        \
1891               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
1892             i++;                                                        \
1893           }                                                             \
1894         if (consumed_chars < consumed_chars_limit)                      \
1895           goto invalid_code;                                            \
1896         charbuf_base[0] -= i;                                           \
1897       }                                                                 \
1898   } while (0)
1899
1900
1901 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)            \
1902   do {                                                          \
1903     /* Emacs 20 style format for relative composition.  */      \
1904     /* Store multibyte form of characters to be composed.  */   \
1905     enum composition_method method = COMPOSITION_RELATIVE;      \
1906     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1907     int *buf = components;                                      \
1908     int i, j;                                                   \
1909     int from, to;                                               \
1910                                                                 \
1911     src = src_base;                                             \
1912     ONE_MORE_BYTE (c);          /* skip 0x80 */                 \
1913     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1914       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                 \
1915     if (i < 2)                                                  \
1916       goto invalid_code;                                        \
1917     from = coding->produced_char + char_offset;                 \
1918     to = from + i;                                              \
1919     ADD_COMPOSITION_DATA (charbuf, from, to, method);           \
1920     for (j = 0; j < i; j++)                                     \
1921       *charbuf++ = components[j];                               \
1922   } while (0)
1923
1924
1925 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
1926   do {                                                          \
1927     /* Emacs 20 style format for rule-base composition.  */     \
1928     /* Store multibyte form of characters to be composed.  */   \
1929     enum composition_method method = COMPOSITION_WITH_RULE;     \
1930     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1931     int *buf = components;                                      \
1932     int i, j;                                                   \
1933     int from, to;                                               \
1934                                                                 \
1935     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
1936     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1937       {                                                         \
1938         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
1939         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
1940       }                                                         \
1941     if (i < 1 || (buf - components) % 2 == 0)                   \
1942       goto invalid_code;                                        \
1943     if (charbuf + i + (i / 2) + 1 < charbuf_end)                \
1944       goto no_more_source;                                      \
1945     from = coding->produced_char + char_offset;                 \
1946     to = from + i;                                              \
1947     ADD_COMPOSITION_DATA (buf, from, to, method);               \
1948     for (j = 0; j < i; j++)                                     \
1949       *charbuf++ = components[j];                               \
1950     for (j = 0; j < i; j += 2)                                  \
1951       *charbuf++ = components[j];                               \
1952   } while (0)
1953
1954
1955 static void
1956 decode_coding_emacs_mule (coding)
1957      struct coding_system *coding;
1958 {
1959   unsigned char *src = coding->source + coding->consumed;
1960   unsigned char *src_end = coding->source + coding->src_bytes;
1961   unsigned char *src_base;
1962   int *charbuf = coding->charbuf;
1963   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
1964   int consumed_chars = 0, consumed_chars_base;
1965   int multibytep = coding->src_multibyte;
1966   Lisp_Object attrs, eol_type, charset_list;
1967   int char_offset = coding->produced_char;
1968   int last_offset = char_offset;
1969   int last_id = charset_ascii;
1970
1971   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1972
1973   while (1)
1974     {
1975       int c;
1976
1977       src_base = src;
1978       consumed_chars_base = consumed_chars;
1979
1980       if (charbuf >= charbuf_end)
1981         break;
1982
1983       ONE_MORE_BYTE (c);
1984
1985       if (c < 0x80)
1986         {
1987           if (c == '\r')
1988             {
1989               if (EQ (eol_type, Qdos))
1990                 {
1991                   if (src == src_end)
1992                     {
1993                       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1994                       goto no_more_source;
1995                     }
1996                   if (*src == '\n')
1997                     ONE_MORE_BYTE (c);
1998                 }
1999               else if (EQ (eol_type, Qmac))
2000                 c = '\n';
2001             }
2002           *charbuf++ = c;
2003           char_offset++;
2004         }
2005       else if (c == 0x80)
2006         {
2007           ONE_MORE_BYTE (c);
2008           if (c - 0xF2 >= COMPOSITION_RELATIVE
2009               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2010             DECODE_EMACS_MULE_21_COMPOSITION (c);
2011           else if (c < 0xC0)
2012             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2013           else if (c == 0xFF)
2014             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2015           else
2016             goto invalid_code;
2017         }
2018       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2019         {
2020           int nbytes, nchars;
2021           int id;
2022
2023           src = src_base;
2024           consumed_chars = consumed_chars_base;
2025           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2026           if (c < 0)
2027             {
2028               if (c == -2)
2029                 break;
2030               goto invalid_code;
2031             }
2032           if (last_id != id)
2033             {
2034               if (last_id != charset_ascii)
2035                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2036               last_id = id;
2037               last_offset = char_offset;
2038             }
2039           *charbuf++ = c;
2040           src += nbytes;
2041           consumed_chars += nchars;
2042           char_offset++;
2043         }
2044       continue;
2045
2046     invalid_code:
2047       src = src_base;
2048       consumed_chars = consumed_chars_base;
2049       ONE_MORE_BYTE (c);
2050       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2051       char_offset++;
2052       coding->errors++;
2053     }
2054
2055  no_more_source:
2056   if (last_id != charset_ascii)
2057     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2058   coding->consumed_char += consumed_chars_base;
2059   coding->consumed = src_base - coding->source;
2060   coding->charbuf_used = charbuf - coding->charbuf;
2061 }
2062
2063
2064 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2065   do {                                          \
2066     if (id < 0xA0)                              \
2067       codes[0] = id, codes[1] = 0;              \
2068     else if (id < 0xE0)                         \
2069       codes[0] = 0x9A, codes[1] = id;           \
2070     else if (id < 0xF0)                         \
2071       codes[0] = 0x9B, codes[1] = id;           \
2072     else if (id < 0xF5)                         \
2073       codes[0] = 0x9C, codes[1] = id;           \
2074     else                                        \
2075       codes[0] = 0x9D, codes[1] = id;           \
2076   } while (0);
2077
2078
2079 static int
2080 encode_coding_emacs_mule (coding)
2081      struct coding_system *coding;
2082 {
2083   int multibytep = coding->dst_multibyte;
2084   int *charbuf = coding->charbuf;
2085   int *charbuf_end = charbuf + coding->charbuf_used;
2086   unsigned char *dst = coding->destination + coding->produced;
2087   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2088   int safe_room = 8;
2089   int produced_chars = 0;
2090   Lisp_Object attrs, eol_type, charset_list;
2091   int c;
2092   int preferred_charset_id = -1;
2093
2094   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2095
2096   while (charbuf < charbuf_end)
2097     {
2098       ASSURE_DESTINATION (safe_room);
2099       c = *charbuf++;
2100
2101       if (c < 0)
2102         {
2103           /* Handle an annotation.  */
2104           switch (*charbuf)
2105             {
2106             case CODING_ANNOTATE_COMPOSITION_MASK:
2107               /* Not yet implemented.  */
2108               break;
2109             case CODING_ANNOTATE_CHARSET_MASK:
2110               preferred_charset_id = charbuf[3];
2111               if (preferred_charset_id >= 0
2112                   && NILP (Fmemq (make_number (preferred_charset_id),
2113                                   charset_list)))
2114                 preferred_charset_id = -1;
2115               break;
2116             default:
2117               abort ();
2118             }
2119           charbuf += -c - 1;
2120           continue;
2121         }
2122
2123       if (ASCII_CHAR_P (c))
2124         EMIT_ONE_ASCII_BYTE (c);
2125       else if (CHAR_BYTE8_P (c))
2126         {
2127           c = CHAR_TO_BYTE8 (c);
2128           EMIT_ONE_BYTE (c);
2129         }
2130       else
2131         {
2132           struct charset *charset;
2133           unsigned code;
2134           int dimension;
2135           int emacs_mule_id;
2136           unsigned char leading_codes[2];
2137
2138           if (preferred_charset_id >= 0)
2139             {
2140               charset = CHARSET_FROM_ID (preferred_charset_id);
2141               if (! CHAR_CHARSET_P (c, charset))
2142                 charset = char_charset (c, charset_list, NULL);
2143             }
2144           else
2145             charset = char_charset (c, charset_list, &code);
2146           if (! charset)
2147             {
2148               c = coding->default_char;
2149               if (ASCII_CHAR_P (c))
2150                 {
2151                   EMIT_ONE_ASCII_BYTE (c);
2152                   continue;
2153                 }
2154               charset = char_charset (c, charset_list, &code);
2155             }
2156           dimension = CHARSET_DIMENSION (charset);
2157           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2158           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2159           EMIT_ONE_BYTE (leading_codes[0]);
2160           if (leading_codes[1])
2161             EMIT_ONE_BYTE (leading_codes[1]);
2162           if (dimension == 1)
2163             EMIT_ONE_BYTE (code);
2164           else
2165             {
2166               EMIT_ONE_BYTE (code >> 8);
2167               EMIT_ONE_BYTE (code & 0xFF);
2168             }
2169         }
2170     }
2171   coding->result = CODING_RESULT_SUCCESS;
2172   coding->produced_char += produced_chars;
2173   coding->produced = dst - coding->destination;
2174   return 0;
2175 }
2176
2177 \f
2178 /*** 7. ISO2022 handlers ***/
2179
2180 /* The following note describes the coding system ISO2022 briefly.
2181    Since the intention of this note is to help understand the
2182    functions in this file, some parts are NOT ACCURATE or are OVERLY
2183    SIMPLIFIED.  For thorough understanding, please refer to the
2184    original document of ISO2022.  This is equivalent to the standard
2185    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2186
2187    ISO2022 provides many mechanisms to encode several character sets
2188    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2189    is encoded using bytes less than 128.  This may make the encoded
2190    text a little bit longer, but the text passes more easily through
2191    several types of gateway, some of which strip off the MSB (Most
2192    Significant Bit).
2193
2194    There are two kinds of character sets: control character sets and
2195    graphic character sets.  The former contain control characters such
2196    as `newline' and `escape' to provide control functions (control
2197    functions are also provided by escape sequences).  The latter
2198    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2199    two control character sets and many graphic character sets.
2200
2201    Graphic character sets are classified into one of the following
2202    four classes, according to the number of bytes (DIMENSION) and
2203    number of characters in one dimension (CHARS) of the set:
2204    - DIMENSION1_CHARS94
2205    - DIMENSION1_CHARS96
2206    - DIMENSION2_CHARS94
2207    - DIMENSION2_CHARS96
2208
2209    In addition, each character set is assigned an identification tag,
2210    unique for each set, called the "final character" (denoted as <F>
2211    hereafter).  The <F> of each character set is decided by ECMA(*)
2212    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2213    (0x30..0x3F are for private use only).
2214
2215    Note (*): ECMA = European Computer Manufacturers Association
2216
2217    Here are examples of graphic character sets [NAME(<F>)]:
2218         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2219         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2220         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2221         o DIMENSION2_CHARS96 -- none for the moment
2222
2223    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2224         C0 [0x00..0x1F] -- control character plane 0
2225         GL [0x20..0x7F] -- graphic character plane 0
2226         C1 [0x80..0x9F] -- control character plane 1
2227         GR [0xA0..0xFF] -- graphic character plane 1
2228
2229    A control character set is directly designated and invoked to C0 or
2230    C1 by an escape sequence.  The most common case is that:
2231    - ISO646's  control character set is designated/invoked to C0, and
2232    - ISO6429's control character set is designated/invoked to C1,
2233    and usually these designations/invocations are omitted in encoded
2234    text.  In a 7-bit environment, only C0 can be used, and a control
2235    character for C1 is encoded by an appropriate escape sequence to
2236    fit into the environment.  All control characters for C1 are
2237    defined to have corresponding escape sequences.
2238
2239    A graphic character set is at first designated to one of four
2240    graphic registers (G0 through G3), then these graphic registers are
2241    invoked to GL or GR.  These designations and invocations can be
2242    done independently.  The most common case is that G0 is invoked to
2243    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2244    these invocations and designations are omitted in encoded text.
2245    In a 7-bit environment, only GL can be used.
2246
2247    When a graphic character set of CHARS94 is invoked to GL, codes
2248    0x20 and 0x7F of the GL area work as control characters SPACE and
2249    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2250    be used.
2251
2252    There are two ways of invocation: locking-shift and single-shift.
2253    With locking-shift, the invocation lasts until the next different
2254    invocation, whereas with single-shift, the invocation affects the
2255    following character only and doesn't affect the locking-shift
2256    state.  Invocations are done by the following control characters or
2257    escape sequences:
2258
2259    ----------------------------------------------------------------------
2260    abbrev  function                  cntrl escape seq   description
2261    ----------------------------------------------------------------------
2262    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2263    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2264    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2265    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2266    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2267    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2268    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2269    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2270    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2271    ----------------------------------------------------------------------
2272    (*) These are not used by any known coding system.
2273
2274    Control characters for these functions are defined by macros
2275    ISO_CODE_XXX in `coding.h'.
2276
2277    Designations are done by the following escape sequences:
2278    ----------------------------------------------------------------------
2279    escape sequence      description
2280    ----------------------------------------------------------------------
2281    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2282    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2283    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2284    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2285    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2286    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2287    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2288    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2289    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2290    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2291    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2292    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2293    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2294    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2295    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2296    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2297    ----------------------------------------------------------------------
2298
2299    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2300    of dimension 1, chars 94, and final character <F>, etc...
2301
2302    Note (*): Although these designations are not allowed in ISO2022,
2303    Emacs accepts them on decoding, and produces them on encoding
2304    CHARS96 character sets in a coding system which is characterized as
2305    7-bit environment, non-locking-shift, and non-single-shift.
2306
2307    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2308    '(' must be omitted.  We refer to this as "short-form" hereafter.
2309
2310    Now you may notice that there are a lot of ways of encoding the
2311    same multilingual text in ISO2022.  Actually, there exist many
2312    coding systems such as Compound Text (used in X11's inter client
2313    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2314    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2315    localized platforms), and all of these are variants of ISO2022.
2316
2317    In addition to the above, Emacs handles two more kinds of escape
2318    sequences: ISO6429's direction specification and Emacs' private
2319    sequence for specifying character composition.
2320
2321    ISO6429's direction specification takes the following form:
2322         o CSI ']'      -- end of the current direction
2323         o CSI '0' ']'  -- end of the current direction
2324         o CSI '1' ']'  -- start of left-to-right text
2325         o CSI '2' ']'  -- start of right-to-left text
2326    The control character CSI (0x9B: control sequence introducer) is
2327    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2328
2329    Character composition specification takes the following form:
2330         o ESC '0' -- start relative composition
2331         o ESC '1' -- end composition
2332         o ESC '2' -- start rule-base composition (*)
2333         o ESC '3' -- start relative composition with alternate chars  (**)
2334         o ESC '4' -- start rule-base composition with alternate chars  (**)
2335   Since these are not standard escape sequences of any ISO standard,
2336   the use of them with these meanings is restricted to Emacs only.
2337
2338   (*) This form is used only in Emacs 20.7 and older versions,
2339   but newer versions can safely decode it.
2340   (**) This form is used only in Emacs 21.1 and newer versions,
2341   and older versions can't decode it.
2342
2343   Here's a list of example usages of these composition escape
2344   sequences (categorized by `enum composition_method').
2345
2346   COMPOSITION_RELATIVE:
2347         ESC 0 CHAR [ CHAR ] ESC 1
2348   COMPOSITION_WITH_RULE:
2349         ESC 2 CHAR [ RULE CHAR ] ESC 1
2350   COMPOSITION_WITH_ALTCHARS:
2351         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2352   COMPOSITION_WITH_RULE_ALTCHARS:
2353         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2354
2355 enum iso_code_class_type iso_code_class[256];
2356
2357 #define SAFE_CHARSET_P(coding, id)      \
2358   ((id) <= (coding)->max_charset_id     \
2359    && (coding)->safe_charsets[id] >= 0)
2360
2361
2362 #define SHIFT_OUT_OK(category)  \
2363   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2364
2365 static void
2366 setup_iso_safe_charsets (attrs)
2367      Lisp_Object attrs;
2368 {
2369   Lisp_Object charset_list, safe_charsets;
2370   Lisp_Object request;
2371   Lisp_Object reg_usage;
2372   Lisp_Object tail;
2373   int reg94, reg96;
2374   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2375   int max_charset_id;
2376
2377   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2378   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2379       && ! EQ (charset_list, Viso_2022_charset_list))
2380     {
2381       CODING_ATTR_CHARSET_LIST (attrs)
2382         = charset_list = Viso_2022_charset_list;
2383       ASET (attrs, coding_attr_safe_charsets, Qnil);
2384     }
2385
2386   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2387     return;
2388
2389   max_charset_id = 0;
2390   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2391     {
2392       int id = XINT (XCAR (tail));
2393       if (max_charset_id < id)
2394         max_charset_id = id;
2395     }
2396
2397   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2398                                 make_number (255));
2399   request = AREF (attrs, coding_attr_iso_request);
2400   reg_usage = AREF (attrs, coding_attr_iso_usage);
2401   reg94 = XINT (XCAR (reg_usage));
2402   reg96 = XINT (XCDR (reg_usage));
2403
2404   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2405     {
2406       Lisp_Object id;
2407       Lisp_Object reg;
2408       struct charset *charset;
2409
2410       id = XCAR (tail);
2411       charset = CHARSET_FROM_ID (XINT (id));
2412       reg = Fcdr (Fassq (id, request));
2413       if (! NILP (reg))
2414         XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2415       else if (charset->iso_chars_96)
2416         {
2417           if (reg96 < 4)
2418             XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2419         }
2420       else
2421         {
2422           if (reg94 < 4)
2423             XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2424         }
2425     }
2426   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2427 }
2428
2429
2430 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2431    Check if a text is encoded in one of ISO-2022 based codig systems.
2432    If it is, return 1, else return 0.  */
2433
2434 static int
2435 detect_coding_iso_2022 (coding, detect_info)
2436      struct coding_system *coding;
2437      struct coding_detection_info *detect_info;
2438 {
2439   unsigned char *src = coding->source, *src_base = src;
2440   unsigned char *src_end = coding->source + coding->src_bytes;
2441   int multibytep = coding->src_multibyte;
2442   int single_shifting = 0;
2443   int id;
2444   int c, c1;
2445   int consumed_chars = 0;
2446   int i;
2447   int rejected = 0;
2448   int found = 0;
2449
2450   detect_info->checked |= CATEGORY_MASK_ISO;
2451
2452   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2453     {
2454       struct coding_system *this = &(coding_categories[i]);
2455       Lisp_Object attrs, val;
2456
2457       attrs = CODING_ID_ATTRS (this->id);
2458       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2459           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2460         setup_iso_safe_charsets (attrs);
2461       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2462       this->max_charset_id = XSTRING (val)->size - 1;
2463       this->safe_charsets = (char *) XSTRING (val)->data;
2464     }
2465
2466   /* A coding system of this category is always ASCII compatible.  */
2467   src += coding->head_ascii;
2468
2469   while (rejected != CATEGORY_MASK_ISO)
2470     {
2471       ONE_MORE_BYTE (c);
2472       switch (c)
2473         {
2474         case ISO_CODE_ESC:
2475           if (inhibit_iso_escape_detection)
2476             break;
2477           single_shifting = 0;
2478           ONE_MORE_BYTE (c);
2479           if (c >= '(' && c <= '/')
2480             {
2481               /* Designation sequence for a charset of dimension 1.  */
2482               ONE_MORE_BYTE (c1);
2483               if (c1 < ' ' || c1 >= 0x80
2484                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2485                 /* Invalid designation sequence.  Just ignore.  */
2486                 break;
2487             }
2488           else if (c == '$')
2489             {
2490               /* Designation sequence for a charset of dimension 2.  */
2491               ONE_MORE_BYTE (c);
2492               if (c >= '@' && c <= 'B')
2493                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2494                 id = iso_charset_table[1][0][c];
2495               else if (c >= '(' && c <= '/')
2496                 {
2497                   ONE_MORE_BYTE (c1);
2498                   if (c1 < ' ' || c1 >= 0x80
2499                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2500                     /* Invalid designation sequence.  Just ignore.  */
2501                     break;
2502                 }
2503               else
2504                 /* Invalid designation sequence.  Just ignore it.  */
2505                 break;
2506             }
2507           else if (c == 'N' || c == 'O')
2508             {
2509               /* ESC <Fe> for SS2 or SS3.  */
2510               single_shifting = 1;
2511               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2512               break;
2513             }
2514           else if (c >= '0' && c <= '4')
2515             {
2516               /* ESC <Fp> for start/end composition.  */
2517               found |= CATEGORY_MASK_ISO;
2518               break;
2519             }
2520           else
2521             {
2522               /* Invalid escape sequence.  Just ignore it.  */
2523               break;
2524             }
2525
2526           /* We found a valid designation sequence for CHARSET.  */
2527           rejected |= CATEGORY_MASK_ISO_8BIT;
2528           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2529                               id))
2530             found |= CATEGORY_MASK_ISO_7;
2531           else
2532             rejected |= CATEGORY_MASK_ISO_7;
2533           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2534                               id))
2535             found |= CATEGORY_MASK_ISO_7_TIGHT;
2536           else
2537             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2538           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2539                               id))
2540             found |= CATEGORY_MASK_ISO_7_ELSE;
2541           else
2542             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2543           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2544                               id))
2545             found |= CATEGORY_MASK_ISO_8_ELSE;
2546           else
2547             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2548           break;
2549
2550         case ISO_CODE_SO:
2551         case ISO_CODE_SI:
2552           /* Locking shift out/in.  */
2553           if (inhibit_iso_escape_detection)
2554             break;
2555           single_shifting = 0;
2556           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2557           found |= CATEGORY_MASK_ISO_ELSE;
2558           break;
2559
2560         case ISO_CODE_CSI:
2561           /* Control sequence introducer.  */
2562           single_shifting = 0;
2563           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2564           found |= CATEGORY_MASK_ISO_8_ELSE;
2565           goto check_extra_latin;
2566
2567
2568         case ISO_CODE_SS2:
2569         case ISO_CODE_SS3:
2570           /* Single shift.   */
2571           if (inhibit_iso_escape_detection)
2572             break;
2573           single_shifting = 1;
2574           rejected |= CATEGORY_MASK_ISO_7BIT;
2575           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2576               & CODING_ISO_FLAG_SINGLE_SHIFT)
2577             found |= CATEGORY_MASK_ISO_8_1;
2578           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2579               & CODING_ISO_FLAG_SINGLE_SHIFT)
2580             found |= CATEGORY_MASK_ISO_8_2;
2581           goto check_extra_latin;
2582
2583         default:
2584           if (c < 0x80)
2585             {
2586               single_shifting = 0;
2587               break;
2588             }
2589           if (c >= 0xA0)
2590             {
2591               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2592               found |= CATEGORY_MASK_ISO_8_1;
2593               /* Check the length of succeeding codes of the range
2594                  0xA0..0FF.  If the byte length is even, we include
2595                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2596                  only when we are not single shifting.  */
2597               if (! single_shifting
2598                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2599                 {
2600                   int i = 1;
2601                   while (src < src_end)
2602                     {
2603                       ONE_MORE_BYTE (c);
2604                       if (c < 0xA0)
2605                         break;
2606                       i++;
2607                     }
2608
2609                   if (i & 1 && src < src_end)
2610                     rejected |= CATEGORY_MASK_ISO_8_2;
2611                   else
2612                     found |= CATEGORY_MASK_ISO_8_2;
2613                 }
2614               break;
2615             }
2616         check_extra_latin:
2617           single_shifting = 0;
2618           if (! VECTORP (Vlatin_extra_code_table)
2619               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2620             {
2621               rejected = CATEGORY_MASK_ISO;
2622               break;
2623             }
2624           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2625               & CODING_ISO_FLAG_LATIN_EXTRA)
2626             found |= CATEGORY_MASK_ISO_8_1;
2627           else
2628             rejected |= CATEGORY_MASK_ISO_8_1;
2629           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2630               & CODING_ISO_FLAG_LATIN_EXTRA)
2631             found |= CATEGORY_MASK_ISO_8_2;
2632           else
2633             rejected |= CATEGORY_MASK_ISO_8_2;
2634         }
2635     }
2636   detect_info->rejected |= CATEGORY_MASK_ISO;
2637   return 0;
2638
2639  no_more_source:
2640   detect_info->rejected |= rejected;
2641   detect_info->found |= (found & ~rejected);
2642   return 1;
2643 }
2644
2645
2646 /* Set designation state into CODING.  */
2647 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2648   do {                                                                  \
2649     int id, prev;                                                       \
2650                                                                         \
2651     if (final < '0' || final >= 128                                     \
2652         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2653         || !SAFE_CHARSET_P (coding, id))                                \
2654       {                                                                 \
2655         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2656         goto invalid_code;                                              \
2657       }                                                                 \
2658     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2659     if (id == charset_jisx0201_roman)                                   \
2660       {                                                                 \
2661         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2662           id = charset_ascii;                                           \
2663       }                                                                 \
2664     else if (id == charset_jisx0208_1978)                               \
2665       {                                                                 \
2666         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2667           id = charset_jisx0208;                                        \
2668       }                                                                 \
2669     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2670     /* If there was an invalid designation to REG previously, and this  \
2671        designation is ASCII to REG, we should keep this designation     \
2672        sequence.  */                                                    \
2673     if (prev == -2 && id == charset_ascii)                              \
2674       goto invalid_code;                                                \
2675   } while (0)
2676
2677
2678 #define MAYBE_FINISH_COMPOSITION()                              \
2679   do {                                                          \
2680     int i;                                                      \
2681     if (composition_state == COMPOSING_NO)                      \
2682       break;                                                    \
2683     /* It is assured that we have enough room for producing     \
2684        characters stored in the table `components'.  */         \
2685     if (charbuf + component_idx > charbuf_end)                  \
2686       goto no_more_source;                                      \
2687     composition_state = COMPOSING_NO;                           \
2688     if (method == COMPOSITION_RELATIVE                          \
2689         || method == COMPOSITION_WITH_ALTCHARS)                 \
2690       {                                                         \
2691         for (i = 0; i < component_idx; i++)                     \
2692           *charbuf++ = components[i];                           \
2693         char_offset += component_idx;                           \
2694       }                                                         \
2695     else                                                        \
2696       {                                                         \
2697         for (i = 0; i < component_idx; i += 2)                  \
2698           *charbuf++ = components[i];                           \
2699         char_offset += (component_idx / 2) + 1;                 \
2700       }                                                         \
2701   } while (0)
2702
2703
2704 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2705    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2706    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2707    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2708    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2709   */
2710
2711 #define DECODE_COMPOSITION_START(c1)                                    \
2712   do {                                                                  \
2713     if (c1 == '0'                                                       \
2714         && composition_state == COMPOSING_COMPONENT_RULE)               \
2715       {                                                                 \
2716         component_len = component_idx;                                  \
2717         composition_state = COMPOSING_CHAR;                             \
2718       }                                                                 \
2719     else                                                                \
2720       {                                                                 \
2721         unsigned char *p;                                               \
2722                                                                         \
2723         MAYBE_FINISH_COMPOSITION ();                                    \
2724         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
2725           goto no_more_source;                                          \
2726         for (p = src; p < src_end - 1; p++)                             \
2727           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
2728             break;                                                      \
2729         if (p == src_end - 1)                                           \
2730           {                                                             \
2731             if (coding->mode & CODING_MODE_LAST_BLOCK)                  \
2732               goto invalid_code;                                        \
2733             goto no_more_source;                                        \
2734           }                                                             \
2735                                                                         \
2736         /* This is surely the start of a composition.  */               \
2737         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
2738                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
2739                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
2740                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
2741         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
2742                              : COMPOSING_COMPONENT_CHAR);               \
2743         component_idx = component_len = 0;                              \
2744       }                                                                 \
2745   } while (0)
2746
2747
2748 /* Handle compositoin end sequence ESC 1.  */
2749
2750 #define DECODE_COMPOSITION_END()                                        \
2751   do {                                                                  \
2752     int nchars = (component_len > 0 ? component_idx - component_len     \
2753                   : method == COMPOSITION_RELATIVE ? component_idx      \
2754                   : (component_idx + 1) / 2);                           \
2755     int i;                                                              \
2756     int *saved_charbuf = charbuf;                                       \
2757     int from = coding->produced_char + char_offset;                     \
2758     int to = from + nchars;                                             \
2759                                                                         \
2760     ADD_COMPOSITION_DATA (charbuf, from, to, method);                   \
2761     if (method != COMPOSITION_RELATIVE)                                 \
2762       {                                                                 \
2763         if (component_len == 0)                                         \
2764           for (i = 0; i < component_idx; i++)                           \
2765             *charbuf++ = components[i];                                 \
2766         else                                                            \
2767           for (i = 0; i < component_len; i++)                           \
2768             *charbuf++ = components[i];                                 \
2769         *saved_charbuf = saved_charbuf - charbuf;                       \
2770       }                                                                 \
2771     if (method == COMPOSITION_WITH_RULE)                                \
2772       for (i = 0; i < component_idx; i += 2, char_offset++)             \
2773         *charbuf++ = components[i];                                     \
2774     else                                                                \
2775       for (i = component_len; i < component_idx; i++, char_offset++)    \
2776         *charbuf++ = components[i];                                     \
2777     coding->annotated = 1;                                              \
2778     composition_state = COMPOSING_NO;                                   \
2779   } while (0)
2780
2781
2782 /* Decode a composition rule from the byte C1 (and maybe one more byte
2783    from SRC) and store one encoded composition rule in
2784    coding->cmp_data.  */
2785
2786 #define DECODE_COMPOSITION_RULE(c1)                                     \
2787   do {                                                                  \
2788     (c1) -= 32;                                                         \
2789     if (c1 < 81)                /* old format (before ver.21) */        \
2790       {                                                                 \
2791         int gref = (c1) / 9;                                            \
2792         int nref = (c1) % 9;                                            \
2793         if (gref == 4) gref = 10;                                       \
2794         if (nref == 4) nref = 10;                                       \
2795         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
2796       }                                                                 \
2797     else if (c1 < 93)           /* new format (after ver.21) */         \
2798       {                                                                 \
2799         ONE_MORE_BYTE (c2);                                             \
2800         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
2801       }                                                                 \
2802     else                                                                \
2803       c1 = 0;                                                           \
2804   } while (0)
2805
2806
2807 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2808
2809 static void
2810 decode_coding_iso_2022 (coding)
2811      struct coding_system *coding;
2812 {
2813   unsigned char *src = coding->source + coding->consumed;
2814   unsigned char *src_end = coding->source + coding->src_bytes;
2815   unsigned char *src_base;
2816   int *charbuf = coding->charbuf;
2817   int *charbuf_end
2818     = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
2819   int consumed_chars = 0, consumed_chars_base;
2820   int multibytep = coding->src_multibyte;
2821   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
2822   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2823   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2824   struct charset *charset;
2825   int c;
2826   /* For handling composition sequence.  */
2827 #define COMPOSING_NO                    0
2828 #define COMPOSING_CHAR                  1
2829 #define COMPOSING_RULE                  2
2830 #define COMPOSING_COMPONENT_CHAR        3
2831 #define COMPOSING_COMPONENT_RULE        4
2832
2833   int composition_state = COMPOSING_NO;
2834   enum composition_method method;
2835   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2836   int component_idx;
2837   int component_len;
2838   Lisp_Object attrs, eol_type, charset_list;
2839   int char_offset = coding->produced_char;
2840   int last_offset = char_offset;
2841   int last_id = charset_ascii;
2842
2843   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2844   setup_iso_safe_charsets (attrs);
2845
2846   while (1)
2847     {
2848       int c1, c2;
2849
2850       src_base = src;
2851       consumed_chars_base = consumed_chars;
2852
2853       if (charbuf >= charbuf_end)
2854         break;
2855
2856       ONE_MORE_BYTE (c1);
2857
2858       /* We produce at most one character.  */
2859       switch (iso_code_class [c1])
2860         {
2861         case ISO_0x20_or_0x7F:
2862           if (composition_state != COMPOSING_NO)
2863             {
2864               if (composition_state == COMPOSING_RULE
2865                   || composition_state == COMPOSING_COMPONENT_RULE)
2866                 {
2867                   DECODE_COMPOSITION_RULE (c1);
2868                   components[component_idx++] = c1;
2869                   composition_state--;
2870                   continue;
2871                 }
2872             }
2873           if (charset_id_0 < 0
2874               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2875             /* This is SPACE or DEL.  */
2876             charset = CHARSET_FROM_ID (charset_ascii);
2877           else
2878             charset = CHARSET_FROM_ID (charset_id_0);
2879           break;
2880
2881         case ISO_graphic_plane_0:
2882           if (composition_state != COMPOSING_NO)
2883             {
2884               if (composition_state == COMPOSING_RULE
2885                   || composition_state == COMPOSING_COMPONENT_RULE)
2886                 {
2887                   DECODE_COMPOSITION_RULE (c1);
2888                   components[component_idx++] = c1;
2889                   composition_state--;
2890                   continue;
2891                 }
2892             }
2893           charset = CHARSET_FROM_ID (charset_id_0);
2894           break;
2895
2896         case ISO_0xA0_or_0xFF:
2897           if (charset_id_1 < 0
2898               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2899               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2900             goto invalid_code;
2901           /* This is a graphic character, we fall down ... */
2902
2903         case ISO_graphic_plane_1:
2904           if (charset_id_1 < 0)
2905             goto invalid_code;
2906           charset = CHARSET_FROM_ID (charset_id_1);
2907           break;
2908
2909         case ISO_carriage_return:
2910           if (c1 == '\r')
2911             {
2912               if (EQ (eol_type, Qdos))
2913                 {
2914                   if (src == src_end)
2915                     {
2916                       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
2917                       goto no_more_source;
2918                     }
2919                   if (*src == '\n')
2920                     ONE_MORE_BYTE (c1);
2921                 }
2922               else if (EQ (eol_type, Qmac))
2923                 c1 = '\n';
2924             }
2925           /* fall through */
2926
2927         case ISO_control_0:
2928           MAYBE_FINISH_COMPOSITION ();
2929           charset = CHARSET_FROM_ID (charset_ascii);
2930           break;
2931
2932         case ISO_control_1:
2933           MAYBE_FINISH_COMPOSITION ();
2934           goto invalid_code;
2935
2936         case ISO_shift_out:
2937           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2938               || CODING_ISO_DESIGNATION (coding, 1) < 0)
2939             goto invalid_code;
2940           CODING_ISO_INVOCATION (coding, 0) = 1;
2941           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2942           continue;
2943
2944         case ISO_shift_in:
2945           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2946             goto invalid_code;
2947           CODING_ISO_INVOCATION (coding, 0) = 0;
2948           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2949           continue;
2950
2951         case ISO_single_shift_2_7:
2952         case ISO_single_shift_2:
2953           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2954             goto invalid_code;
2955           /* SS2 is handled as an escape sequence of ESC 'N' */
2956           c1 = 'N';
2957           goto label_escape_sequence;
2958
2959         case ISO_single_shift_3:
2960           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2961             goto invalid_code;
2962           /* SS2 is handled as an escape sequence of ESC 'O' */
2963           c1 = 'O';
2964           goto label_escape_sequence;
2965
2966         case ISO_control_sequence_introducer:
2967           /* CSI is handled as an escape sequence of ESC '[' ...  */
2968           c1 = '[';
2969           goto label_escape_sequence;
2970
2971         case ISO_escape:
2972           ONE_MORE_BYTE (c1);
2973         label_escape_sequence:
2974           /* Escape sequences handled here are invocation,
2975              designation, direction specification, and character
2976              composition specification.  */
2977           switch (c1)
2978             {
2979             case '&':           /* revision of following character set */
2980               ONE_MORE_BYTE (c1);
2981               if (!(c1 >= '@' && c1 <= '~'))
2982                 goto invalid_code;
2983               ONE_MORE_BYTE (c1);
2984               if (c1 != ISO_CODE_ESC)
2985                 goto invalid_code;
2986               ONE_MORE_BYTE (c1);
2987               goto label_escape_sequence;
2988
2989             case '$':           /* designation of 2-byte character set */
2990               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2991                 goto invalid_code;
2992               ONE_MORE_BYTE (c1);
2993               if (c1 >= '@' && c1 <= 'B')
2994                 {       /* designation of JISX0208.1978, GB2312.1980,
2995                            or JISX0208.1980 */
2996                   DECODE_DESIGNATION (0, 2, 0, c1);
2997                 }
2998               else if (c1 >= 0x28 && c1 <= 0x2B)
2999                 {       /* designation of DIMENSION2_CHARS94 character set */
3000                   ONE_MORE_BYTE (c2);
3001                   DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
3002                 }
3003               else if (c1 >= 0x2C && c1 <= 0x2F)
3004                 {       /* designation of DIMENSION2_CHARS96 character set */
3005                   ONE_MORE_BYTE (c2);
3006                   DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
3007                 }
3008               else
3009                 goto invalid_code;
3010               /* We must update these variables now.  */
3011               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3012               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3013               continue;
3014
3015             case 'n':           /* invocation of locking-shift-2 */
3016               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3017                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3018                 goto invalid_code;
3019               CODING_ISO_INVOCATION (coding, 0) = 2;
3020               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3021               continue;
3022
3023             case 'o':           /* invocation of locking-shift-3 */
3024               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3025                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3026                 goto invalid_code;
3027               CODING_ISO_INVOCATION (coding, 0) = 3;
3028               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3029               continue;
3030
3031             case 'N':           /* invocation of single-shift-2 */
3032               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3033                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3034                 goto invalid_code;
3035               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
3036               ONE_MORE_BYTE (c1);
3037               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3038                 goto invalid_code;
3039               break;
3040
3041             case 'O':           /* invocation of single-shift-3 */
3042               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3043                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3044                 goto invalid_code;
3045               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
3046               ONE_MORE_BYTE (c1);
3047               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3048                 goto invalid_code;
3049               break;
3050
3051             case '0': case '2': case '3': case '4': /* start composition */
3052               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3053                 goto invalid_code;
3054               DECODE_COMPOSITION_START (c1);
3055               continue;
3056
3057             case '1':           /* end composition */
3058               if (composition_state == COMPOSING_NO)
3059                 goto invalid_code;
3060               DECODE_COMPOSITION_END ();
3061               continue;
3062
3063             case '[':           /* specification of direction */
3064               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3065                 goto invalid_code;
3066               /* For the moment, nested direction is not supported.
3067                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3068                  left-to-right, and nozero means right-to-left.  */
3069               ONE_MORE_BYTE (c1);
3070               switch (c1)
3071                 {
3072                 case ']':       /* end of the current direction */
3073                   coding->mode &= ~CODING_MODE_DIRECTION;
3074
3075                 case '0':       /* end of the current direction */
3076                 case '1':       /* start of left-to-right direction */
3077                   ONE_MORE_BYTE (c1);
3078                   if (c1 == ']')
3079                     coding->mode &= ~CODING_MODE_DIRECTION;
3080                   else
3081                     goto invalid_code;
3082                   break;
3083
3084                 case '2':       /* start of right-to-left direction */
3085                   ONE_MORE_BYTE (c1);
3086                   if (c1 == ']')
3087                     coding->mode |= CODING_MODE_DIRECTION;
3088                   else
3089                     goto invalid_code;
3090                   break;
3091
3092                 default:
3093                   goto invalid_code;
3094                 }
3095               continue;
3096
3097             default:
3098               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3099                 goto invalid_code;
3100               if (c1 >= 0x28 && c1 <= 0x2B)
3101                 {       /* designation of DIMENSION1_CHARS94 character set */
3102                   ONE_MORE_BYTE (c2);
3103                   DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
3104                 }
3105               else if (c1 >= 0x2C && c1 <= 0x2F)
3106                 {       /* designation of DIMENSION1_CHARS96 character set */
3107                   ONE_MORE_BYTE (c2);
3108                   DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
3109                 }
3110               else
3111                 goto invalid_code;
3112               /* We must update these variables now.  */
3113               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3114               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3115               continue;
3116             }
3117         }
3118
3119       if (charset->id != charset_ascii
3120           && last_id != charset->id)
3121         {
3122           if (last_id != charset_ascii)
3123             ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3124           last_id = charset->id;
3125           last_offset = char_offset;
3126         }
3127
3128       /* Now we know CHARSET and 1st position code C1 of a character.
3129          Produce a decoded character while getting 2nd position code
3130          C2 if necessary.  */
3131       c1 &= 0x7F;
3132       if (CHARSET_DIMENSION (charset) > 1)
3133         {
3134           ONE_MORE_BYTE (c2);
3135           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3136             /* C2 is not in a valid range.  */
3137             goto invalid_code;
3138           c1 = (c1 << 8) | (c2 & 0x7F);
3139           if (CHARSET_DIMENSION (charset) > 2)
3140             {
3141               ONE_MORE_BYTE (c2);
3142               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3143                 /* C2 is not in a valid range.  */
3144                 goto invalid_code;
3145               c1 = (c1 << 8) | (c2 & 0x7F);
3146             }
3147         }
3148
3149       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3150       if (c < 0)
3151         {
3152           MAYBE_FINISH_COMPOSITION ();
3153           for (; src_base < src; src_base++, char_offset++)
3154             {
3155               if (ASCII_BYTE_P (*src_base))
3156                 *charbuf++ = *src_base;
3157               else
3158                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3159               char_offset++;
3160             }
3161         }
3162       else if (composition_state == COMPOSING_NO)
3163         {
3164           *charbuf++ = c;
3165           char_offset++;
3166         }
3167       else
3168         {
3169           components[component_idx++] = c;
3170           if (method == COMPOSITION_WITH_RULE
3171               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3172                   && composition_state == COMPOSING_COMPONENT_CHAR))
3173             composition_state++;
3174         }
3175       continue;
3176
3177     invalid_code:
3178       MAYBE_FINISH_COMPOSITION ();
3179       src = src_base;
3180       consumed_chars = consumed_chars_base;
3181       ONE_MORE_BYTE (c);
3182       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3183       char_offset++;
3184       coding->errors++;
3185     }
3186
3187  no_more_source:
3188   if (last_id != charset_ascii)
3189     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3190   coding->consumed_char += consumed_chars_base;
3191   coding->consumed = src_base - coding->source;
3192   coding->charbuf_used = charbuf - coding->charbuf;
3193 }
3194
3195
3196 /* ISO2022 encoding stuff.  */
3197
3198 /*
3199    It is not enough to say just "ISO2022" on encoding, we have to
3200    specify more details.  In Emacs, each coding system of ISO2022
3201    variant has the following specifications:
3202         1. Initial designation to G0 thru G3.
3203         2. Allows short-form designation?
3204         3. ASCII should be designated to G0 before control characters?
3205         4. ASCII should be designated to G0 at end of line?
3206         5. 7-bit environment or 8-bit environment?
3207         6. Use locking-shift?
3208         7. Use Single-shift?
3209    And the following two are only for Japanese:
3210         8. Use ASCII in place of JIS0201-1976-Roman?
3211         9. Use JISX0208-1983 in place of JISX0208-1978?
3212    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3213    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3214    details.
3215 */
3216
3217 /* Produce codes (escape sequence) for designating CHARSET to graphic
3218    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3219    '@', 'A', or 'B' and the coding system CODING allows, produce
3220    designation sequence of short-form.  */
3221
3222 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3223   do {                                                                  \
3224     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3225     char *intermediate_char_94 = "()*+";                                \
3226     char *intermediate_char_96 = ",-./";                                \
3227     int revision = -1;                                                  \
3228     int c;                                                              \
3229                                                                         \
3230     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3231       revision = CHARSET_ISO_REVISION (charset);                        \
3232                                                                         \
3233     if (revision >= 0)                                                  \
3234       {                                                                 \
3235         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3236         EMIT_ONE_BYTE ('@' + revision);                                 \
3237       }                                                                 \
3238     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3239     if (CHARSET_DIMENSION (charset) == 1)                               \
3240       {                                                                 \
3241         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3242           c = intermediate_char_94[reg];                                \
3243         else                                                            \
3244           c = intermediate_char_96[reg];                                \
3245         EMIT_ONE_ASCII_BYTE (c);                                        \
3246       }                                                                 \
3247     else                                                                \
3248       {                                                                 \
3249         EMIT_ONE_ASCII_BYTE ('$');                                      \
3250         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3251           {                                                             \
3252             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3253                 || reg != 0                                             \
3254                 || final_char < '@' || final_char > 'B')                \
3255               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3256           }                                                             \
3257         else                                                            \
3258           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3259       }                                                                 \
3260     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3261                                                                         \
3262     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3263   } while (0)
3264
3265
3266 /* The following two macros produce codes (control character or escape
3267    sequence) for ISO2022 single-shift functions (single-shift-2 and
3268    single-shift-3).  */
3269
3270 #define ENCODE_SINGLE_SHIFT_2                                           \
3271   do {                                                                  \
3272     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3273       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3274     else                                                                \
3275       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3276     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3277   } while (0)
3278
3279
3280 #define ENCODE_SINGLE_SHIFT_3                                           \
3281   do {                                                                  \
3282     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3283       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3284     else                                                                \
3285       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3286     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3287   } while (0)
3288
3289
3290 /* The following four macros produce codes (control character or
3291    escape sequence) for ISO2022 locking-shift functions (shift-in,
3292    shift-out, locking-shift-2, and locking-shift-3).  */
3293
3294 #define ENCODE_SHIFT_IN                                 \
3295   do {                                                  \
3296     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3297     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3298   } while (0)
3299
3300
3301 #define ENCODE_SHIFT_OUT                                \
3302   do {                                                  \
3303     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3304     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3305   } while (0)
3306
3307
3308 #define ENCODE_LOCKING_SHIFT_2                          \
3309   do {                                                  \
3310     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3311     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3312   } while (0)
3313
3314
3315 #define ENCODE_LOCKING_SHIFT_3                          \
3316   do {                                                  \
3317     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3318     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3319   } while (0)
3320
3321
3322 /* Produce codes for a DIMENSION1 character whose character set is
3323    CHARSET and whose position-code is C1.  Designation and invocation
3324    sequences are also produced in advance if necessary.  */
3325
3326 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3327   do {                                                                  \
3328     int id = CHARSET_ID (charset);                                      \
3329                                                                         \
3330     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3331         && id == charset_ascii)                                         \
3332       {                                                                 \
3333         id = charset_jisx0201_roman;                                    \
3334         charset = CHARSET_FROM_ID (id);                                 \
3335       }                                                                 \
3336                                                                         \
3337     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3338       {                                                                 \
3339         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3340           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3341         else                                                            \
3342           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3343         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3344         break;                                                          \
3345       }                                                                 \
3346     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3347       {                                                                 \
3348         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3349         break;                                                          \
3350       }                                                                 \
3351     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3352       {                                                                 \
3353         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3354         break;                                                          \
3355       }                                                                 \
3356     else                                                                \
3357       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3358          must invoke it, or, at first, designate it to some graphic     \
3359          register.  Then repeat the loop to actually produce the        \
3360          character.  */                                                 \
3361       dst = encode_invocation_designation (charset, coding, dst,        \
3362                                            &produced_chars);            \
3363   } while (1)
3364
3365
3366 /* Produce codes for a DIMENSION2 character whose character set is
3367    CHARSET and whose position-codes are C1 and C2.  Designation and
3368    invocation codes are also produced in advance if necessary.  */
3369
3370 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3371   do {                                                                  \
3372     int id = CHARSET_ID (charset);                                      \
3373                                                                         \
3374     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3375         && id == charset_jisx0208)                                      \
3376       {                                                                 \
3377         id = charset_jisx0208_1978;                                     \
3378         charset = CHARSET_FROM_ID (id);                                 \
3379       }                                                                 \
3380                                                                         \
3381     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3382       {                                                                 \
3383         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3384           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3385         else                                                            \
3386           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3387         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3388         break;                                                          \
3389       }                                                                 \
3390     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3391       {                                                                 \
3392         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3393         break;                                                          \
3394       }                                                                 \
3395     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3396       {                                                                 \
3397         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3398         break;                                                          \
3399       }                                                                 \
3400     else                                                                \
3401       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3402          must invoke it, or, at first, designate it to some graphic     \
3403          register.  Then repeat the loop to actually produce the        \
3404          character.  */                                                 \
3405       dst = encode_invocation_designation (charset, coding, dst,        \
3406                                            &produced_chars);            \
3407   } while (1)
3408
3409
3410 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3411   do {                                                                     \
3412     int code = ENCODE_CHAR ((charset),(c));                                \
3413                                                                            \
3414     if (CHARSET_DIMENSION (charset) == 1)                                  \
3415       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3416     else                                                                   \
3417       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3418   } while (0)
3419
3420
3421 /* Produce designation and invocation codes at a place pointed by DST
3422    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3423    Return new DST.  */
3424
3425 unsigned char *
3426 encode_invocation_designation (charset, coding, dst, p_nchars)
3427      struct charset *charset;
3428      struct coding_system *coding;
3429      unsigned char *dst;
3430      int *p_nchars;
3431 {
3432   int multibytep = coding->dst_multibyte;
3433   int produced_chars = *p_nchars;
3434   int reg;                      /* graphic register number */
3435   int id = CHARSET_ID (charset);
3436
3437   /* At first, check designations.  */
3438   for (reg = 0; reg < 4; reg++)
3439     if (id == CODING_ISO_DESIGNATION (coding, reg))
3440       break;
3441
3442   if (reg >= 4)
3443     {
3444       /* CHARSET is not yet designated to any graphic registers.  */
3445       /* At first check the requested designation.  */
3446       reg = CODING_ISO_REQUEST (coding, id);
3447       if (reg < 0)
3448         /* Since CHARSET requests no special designation, designate it
3449            to graphic register 0.  */
3450         reg = 0;
3451
3452       ENCODE_DESIGNATION (charset, reg, coding);
3453     }
3454
3455   if (CODING_ISO_INVOCATION (coding, 0) != reg
3456       && CODING_ISO_INVOCATION (coding, 1) != reg)
3457     {
3458       /* Since the graphic register REG is not invoked to any graphic
3459          planes, invoke it to graphic plane 0.  */
3460       switch (reg)
3461         {
3462         case 0:                 /* graphic register 0 */
3463           ENCODE_SHIFT_IN;
3464           break;
3465
3466         case 1:                 /* graphic register 1 */
3467           ENCODE_SHIFT_OUT;
3468           break;
3469
3470         case 2:                 /* graphic register 2 */
3471           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3472             ENCODE_SINGLE_SHIFT_2;
3473           else
3474             ENCODE_LOCKING_SHIFT_2;
3475           break;
3476
3477         case 3:                 /* graphic register 3 */
3478           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3479             ENCODE_SINGLE_SHIFT_3;
3480           else
3481             ENCODE_LOCKING_SHIFT_3;
3482           break;
3483         }
3484     }
3485
3486   *p_nchars = produced_chars;
3487   return dst;
3488 }
3489
3490 /* The following three macros produce codes for indicating direction
3491    of text.  */
3492 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3493   do {                                                                  \
3494     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3495       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3496     else                                                                \
3497       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3498   } while (0)
3499
3500
3501 #define ENCODE_DIRECTION_R2L()                  \
3502   do {                                          \
3503     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3504     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3505   } while (0)
3506
3507
3508 #define ENCODE_DIRECTION_L2R()                  \
3509   do {                                          \
3510     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3511     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3512   } while (0)
3513
3514
3515 /* Produce codes for designation and invocation to reset the graphic
3516    planes and registers to initial state.  */
3517 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3518   do {                                                                  \
3519     int reg;                                                            \
3520     struct charset *charset;                                            \
3521                                                                         \
3522     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3523       ENCODE_SHIFT_IN;                                                  \
3524     for (reg = 0; reg < 4; reg++)                                       \
3525       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3526           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3527               != CODING_ISO_INITIAL (coding, reg)))                     \
3528         {                                                               \
3529           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3530           ENCODE_DESIGNATION (charset, reg, coding);                    \
3531         }                                                               \
3532   } while (0)
3533
3534
3535 /* Produce designation sequences of charsets in the line started from
3536    SRC to a place pointed by DST, and return updated DST.
3537
3538    If the current block ends before any end-of-line, we may fail to
3539    find all the necessary designations.  */
3540
3541 static unsigned char *
3542 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3543      struct coding_system *coding;
3544      int *charbuf, *charbuf_end;
3545      unsigned char *dst;
3546 {
3547   struct charset *charset;
3548   /* Table of charsets to be designated to each graphic register.  */
3549   int r[4];
3550   int c, found = 0, reg;
3551   int produced_chars = 0;
3552   int multibytep = coding->dst_multibyte;
3553   Lisp_Object attrs;
3554   Lisp_Object charset_list;
3555
3556   attrs = CODING_ID_ATTRS (coding->id);
3557   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3558   if (EQ (charset_list, Qiso_2022))
3559     charset_list = Viso_2022_charset_list;
3560
3561   for (reg = 0; reg < 4; reg++)
3562     r[reg] = -1;
3563
3564   while (found < 4)
3565     {
3566       int id;
3567
3568       c = *charbuf++;
3569       if (c == '\n')
3570         break;
3571       charset = char_charset (c, charset_list, NULL);
3572       id = CHARSET_ID (charset);
3573       reg = CODING_ISO_REQUEST (coding, id);
3574       if (reg >= 0 && r[reg] < 0)
3575         {
3576           found++;
3577           r[reg] = id;
3578         }
3579     }
3580
3581   if (found)
3582     {
3583       for (reg = 0; reg < 4; reg++)
3584         if (r[reg] >= 0
3585             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3586           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3587     }
3588
3589   return dst;
3590 }
3591
3592 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
3593
3594 static int
3595 encode_coding_iso_2022 (coding)
3596      struct coding_system *coding;
3597 {
3598   int multibytep = coding->dst_multibyte;
3599   int *charbuf = coding->charbuf;
3600   int *charbuf_end = charbuf + coding->charbuf_used;
3601   unsigned char *dst = coding->destination + coding->produced;
3602   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3603   int safe_room = 16;
3604   int bol_designation
3605     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3606        && CODING_ISO_BOL (coding));
3607   int produced_chars = 0;
3608   Lisp_Object attrs, eol_type, charset_list;
3609   int ascii_compatible;
3610   int c;
3611   int preferred_charset_id = -1;
3612
3613   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3614   setup_iso_safe_charsets (attrs);
3615   /* Charset list may have been changed.  */
3616   charset_list = CODING_ATTR_CHARSET_LIST (attrs);              \
3617   coding->safe_charsets
3618     = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
3619
3620   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3621
3622   while (charbuf < charbuf_end)
3623     {
3624       ASSURE_DESTINATION (safe_room);
3625
3626       if (bol_designation)
3627         {
3628           unsigned char *dst_prev = dst;
3629
3630           /* We have to produce designation sequences if any now.  */
3631           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3632           bol_designation = 0;
3633           /* We are sure that designation sequences are all ASCII bytes.  */
3634           produced_chars += dst - dst_prev;
3635         }
3636
3637       c = *charbuf++;
3638
3639       if (c < 0)
3640         {
3641           /* Handle an annotation.  */
3642           switch (*charbuf)
3643             {
3644             case CODING_ANNOTATE_COMPOSITION_MASK:
3645               /* Not yet implemented.  */
3646               break;
3647             case CODING_ANNOTATE_CHARSET_MASK:
3648               preferred_charset_id = charbuf[3];
3649               if (preferred_charset_id >= 0
3650                   && NILP (Fmemq (make_number (preferred_charset_id),
3651                                   charset_list)))
3652                 preferred_charset_id = -1;
3653               break;
3654             default:
3655               abort ();
3656             }
3657           charbuf += -c - 1;
3658           continue;
3659         }
3660
3661       /* Now encode the character C.  */
3662       if (c < 0x20 || c == 0x7F)
3663         {
3664           if (c == '\n'
3665               || (c == '\r' && EQ (eol_type, Qmac)))
3666             {
3667               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3668                 ENCODE_RESET_PLANE_AND_REGISTER ();
3669               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3670                 {
3671                   int i;
3672
3673                   for (i = 0; i < 4; i++)
3674                     CODING_ISO_DESIGNATION (coding, i)
3675                       = CODING_ISO_INITIAL (coding, i);
3676                 }
3677               bol_designation
3678                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3679             }
3680           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3681             ENCODE_RESET_PLANE_AND_REGISTER ();
3682           EMIT_ONE_ASCII_BYTE (c);
3683         }
3684       else if (ASCII_CHAR_P (c))
3685         {
3686           if (ascii_compatible)
3687             EMIT_ONE_ASCII_BYTE (c);
3688           else
3689             {
3690               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3691               ENCODE_ISO_CHARACTER (charset, c);
3692             }
3693         }
3694       else if (CHAR_BYTE8_P (c))
3695         {
3696           c = CHAR_TO_BYTE8 (c);
3697           EMIT_ONE_BYTE (c);
3698         }
3699       else
3700         {
3701           struct charset *charset;
3702
3703           if (preferred_charset_id >= 0)
3704             {
3705               charset = CHARSET_FROM_ID (preferred_charset_id);
3706               if (! CHAR_CHARSET_P (c, charset))
3707                 charset = char_charset (c, charset_list, NULL);
3708             }
3709           else
3710             charset = char_charset (c, charset_list, NULL);
3711           if (!charset)
3712             {
3713               if (coding->mode & CODING_MODE_SAFE_ENCODING)
3714                 {
3715                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3716                   charset = CHARSET_FROM_ID (charset_ascii);
3717                 }
3718               else
3719                 {
3720                   c = coding->default_char;
3721                   charset = char_charset (c, charset_list, NULL);
3722                 }
3723             }
3724           ENCODE_ISO_CHARACTER (charset, c);
3725         }
3726     }
3727
3728   if (coding->mode & CODING_MODE_LAST_BLOCK
3729       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3730     {
3731       ASSURE_DESTINATION (safe_room);
3732       ENCODE_RESET_PLANE_AND_REGISTER ();
3733     }
3734   coding->result = CODING_RESULT_SUCCESS;
3735   CODING_ISO_BOL (coding) = bol_designation;
3736   coding->produced_char += produced_chars;
3737   coding->produced = dst - coding->destination;
3738   return 0;
3739 }
3740
3741 \f
3742 /*** 8,9. SJIS and BIG5 handlers ***/
3743
3744 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3745    quite widely.  So, for the moment, Emacs supports them in the bare
3746    C code.  But, in the future, they may be supported only by CCL.  */
3747
3748 /* SJIS is a coding system encoding three character sets: ASCII, right
3749    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
3750    as is.  A character of charset katakana-jisx0201 is encoded by
3751    "position-code + 0x80".  A character of charset japanese-jisx0208
3752    is encoded in 2-byte but two position-codes are divided and shifted
3753    so that it fit in the range below.
3754
3755    --- CODE RANGE of SJIS ---
3756    (character set)      (range)
3757    ASCII                0x00 .. 0x7F
3758    KATAKANA-JISX0201    0xA0 .. 0xDF
3759    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
3760             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
3761    -------------------------------
3762
3763 */
3764
3765 /* BIG5 is a coding system encoding two character sets: ASCII and
3766    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
3767    character set and is encoded in two-byte.
3768
3769    --- CODE RANGE of BIG5 ---
3770    (character set)      (range)
3771    ASCII                0x00 .. 0x7F
3772    Big5 (1st byte)      0xA1 .. 0xFE
3773         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
3774    --------------------------
3775
3776   */
3777
3778 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3779    Check if a text is encoded in SJIS.  If it is, return
3780    CATEGORY_MASK_SJIS, else return 0.  */
3781
3782 static int
3783 detect_coding_sjis (coding, detect_info)
3784      struct coding_system *coding;
3785      struct coding_detection_info *detect_info;
3786 {
3787   unsigned char *src = coding->source, *src_base = src;
3788   unsigned char *src_end = coding->source + coding->src_bytes;
3789   int multibytep = coding->src_multibyte;
3790   int consumed_chars = 0;
3791   int found = 0;
3792   int c;
3793   int incomplete;
3794
3795   detect_info->checked |= CATEGORY_MASK_SJIS;
3796   /* A coding system of this category is always ASCII compatible.  */
3797   src += coding->head_ascii;
3798
3799   while (1)
3800     {
3801       incomplete = 0;
3802       ONE_MORE_BYTE (c);
3803       incomplete = 1;
3804       if (c < 0x80)
3805         continue;
3806       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3807         {
3808           ONE_MORE_BYTE (c);
3809           if (c < 0x40 || c == 0x7F || c > 0xFC)
3810             break;
3811           found = CATEGORY_MASK_SJIS;
3812         }
3813       else if (c >= 0xA0 && c < 0xE0)
3814         found = CATEGORY_MASK_SJIS;
3815       else
3816         break;
3817     }
3818   detect_info->rejected |= CATEGORY_MASK_SJIS;
3819   return 0;
3820
3821  no_more_source:
3822   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3823     {
3824       detect_info->rejected |= CATEGORY_MASK_SJIS;
3825       return 0;
3826     }
3827   detect_info->found |= found;
3828   return 1;
3829 }
3830
3831 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3832    Check if a text is encoded in BIG5.  If it is, return
3833    CATEGORY_MASK_BIG5, else return 0.  */
3834
3835 static int
3836 detect_coding_big5 (coding, detect_info)
3837      struct coding_system *coding;
3838      struct coding_detection_info *detect_info;
3839 {
3840   unsigned char *src = coding->source, *src_base = src;
3841   unsigned char *src_end = coding->source + coding->src_bytes;
3842   int multibytep = coding->src_multibyte;
3843   int consumed_chars = 0;
3844   int found = 0;
3845   int c;
3846   int incomplete;
3847
3848   detect_info->checked |= CATEGORY_MASK_BIG5;
3849   /* A coding system of this category is always ASCII compatible.  */
3850   src += coding->head_ascii;
3851
3852   while (1)
3853     {
3854       incomplete = 0;
3855       ONE_MORE_BYTE (c);
3856       incomplete = 1;
3857       if (c < 0x80)
3858         continue;
3859       if (c >= 0xA1)
3860         {
3861           ONE_MORE_BYTE (c);
3862           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3863             return 0;
3864           found = CATEGORY_MASK_BIG5;
3865         }
3866       else
3867         break;
3868     }
3869   detect_info->rejected |= CATEGORY_MASK_BIG5;
3870   return 0;
3871
3872  no_more_source:
3873   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3874     {
3875       detect_info->rejected |= CATEGORY_MASK_BIG5;
3876       return 0;
3877     }
3878   detect_info->found |= found;
3879   return 1;
3880 }
3881
3882 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3883    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3884
3885 static void
3886 decode_coding_sjis (coding)
3887      struct coding_system *coding;
3888 {
3889   unsigned char *src = coding->source + coding->consumed;
3890   unsigned char *src_end = coding->source + coding->src_bytes;
3891   unsigned char *src_base;
3892   int *charbuf = coding->charbuf;
3893   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
3894   int consumed_chars = 0, consumed_chars_base;
3895   int multibytep = coding->src_multibyte;
3896   struct charset *charset_roman, *charset_kanji, *charset_kana;
3897   Lisp_Object attrs, eol_type, charset_list, val;
3898   int char_offset = coding->produced_char;
3899   int last_offset = char_offset;
3900   int last_id = charset_ascii;
3901
3902   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3903
3904   val = charset_list;
3905   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3906   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3907   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3908
3909   while (1)
3910     {
3911       int c, c1;
3912
3913       src_base = src;
3914       consumed_chars_base = consumed_chars;
3915
3916       if (charbuf >= charbuf_end)
3917         break;
3918
3919       ONE_MORE_BYTE (c);
3920
3921       if (c == '\r')
3922         {
3923           if (EQ (eol_type, Qdos))
3924             {
3925               if (src == src_end)
3926                 {
3927                   coding->result = CODING_RESULT_INSUFFICIENT_SRC;
3928                   goto no_more_source;
3929                 }
3930               if (*src == '\n')
3931                 ONE_MORE_BYTE (c);
3932             }
3933           else if (EQ (eol_type, Qmac))
3934             c = '\n';
3935         }
3936       else
3937         {
3938           struct charset *charset;
3939
3940           if (c < 0x80)
3941             charset = charset_roman;
3942           else
3943             {
3944               if (c >= 0xF0)
3945                 goto invalid_code;
3946               if (c < 0xA0 || c >= 0xE0)
3947                 {
3948                   /* SJIS -> JISX0208 */
3949                   ONE_MORE_BYTE (c1);
3950                   if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3951                     goto invalid_code;
3952                   c = (c << 8) | c1;
3953                   SJIS_TO_JIS (c);
3954                   charset = charset_kanji;
3955                 }
3956               else
3957                 {
3958                   /* SJIS -> JISX0201-Kana */
3959                   c &= 0x7F;
3960                   charset = charset_kana;
3961                 }
3962             }
3963           if (charset->id != charset_ascii
3964               && last_id != charset->id)
3965             {
3966               if (last_id != charset_ascii)
3967                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3968               last_id = charset->id;
3969               last_offset = char_offset;
3970             }
3971           CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3972         }
3973       *charbuf++ = c;
3974       char_offset++;
3975       continue;
3976
3977     invalid_code:
3978       src = src_base;
3979       consumed_chars = consumed_chars_base;
3980       ONE_MORE_BYTE (c);
3981       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3982       char_offset++;
3983       coding->errors++;
3984     }
3985
3986  no_more_source:
3987   if (last_id != charset_ascii)
3988     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3989   coding->consumed_char += consumed_chars_base;
3990   coding->consumed = src_base - coding->source;
3991   coding->charbuf_used = charbuf - coding->charbuf;
3992 }
3993
3994 static void
3995 decode_coding_big5 (coding)
3996      struct coding_system *coding;
3997 {
3998   unsigned char *src = coding->source + coding->consumed;
3999   unsigned char *src_end = coding->source + coding->src_bytes;
4000   unsigned char *src_base;
4001   int *charbuf = coding->charbuf;
4002   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4003   int consumed_chars = 0, consumed_chars_base;
4004   int multibytep = coding->src_multibyte;
4005   struct charset *charset_roman, *charset_big5;
4006   Lisp_Object attrs, eol_type, charset_list, val;
4007   int char_offset = coding->produced_char;
4008   int last_offset = char_offset;
4009   int last_id = charset_ascii;
4010
4011   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4012   val = charset_list;
4013   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4014   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4015
4016   while (1)
4017     {
4018       int c, c1;
4019
4020       src_base = src;
4021       consumed_chars_base = consumed_chars;
4022
4023       if (charbuf >= charbuf_end)
4024         break;
4025
4026       ONE_MORE_BYTE (c);
4027
4028       if (c == '\r')
4029         {
4030           if (EQ (eol_type, Qdos))
4031             {
4032               if (src == src_end)
4033                 {
4034                   coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4035                   goto no_more_source;
4036                 }
4037               if (*src == '\n')
4038                 ONE_MORE_BYTE (c);
4039             }
4040           else if (EQ (eol_type, Qmac))
4041             c = '\n';
4042         }
4043       else
4044         {
4045           struct charset *charset;
4046           if (c < 0x80)
4047             charset = charset_roman;
4048           else
4049             {
4050               /* BIG5 -> Big5 */
4051               if (c < 0xA1 || c > 0xFE)
4052                 goto invalid_code;
4053               ONE_MORE_BYTE (c1);
4054               if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4055                 goto invalid_code;
4056               c = c << 8 | c1;
4057               charset = charset_big5;
4058             }
4059           if (charset->id != charset_ascii
4060               && last_id != charset->id)
4061             {
4062               if (last_id != charset_ascii)
4063                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4064               last_id = charset->id;
4065               last_offset = char_offset;
4066             }
4067           CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4068         }
4069
4070       *charbuf++ = c;
4071       char_offset++;
4072       continue;
4073
4074     invalid_code:
4075       src = src_base;
4076       consumed_chars = consumed_chars_base;
4077       ONE_MORE_BYTE (c);
4078       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4079       char_offset++;
4080       coding->errors++;
4081     }
4082
4083  no_more_source:
4084   if (last_id != charset_ascii)
4085     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4086   coding->consumed_char += consumed_chars_base;
4087   coding->consumed = src_base - coding->source;
4088   coding->charbuf_used = charbuf - coding->charbuf;
4089 }
4090
4091 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4092    This function can encode charsets `ascii', `katakana-jisx0201',
4093    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4094    are sure that all these charsets are registered as official charset
4095    (i.e. do not have extended leading-codes).  Characters of other
4096    charsets are produced without any encoding.  If SJIS_P is 1, encode
4097    SJIS text, else encode BIG5 text.  */
4098
4099 static int
4100 encode_coding_sjis (coding)
4101      struct coding_system *coding;
4102 {
4103   int multibytep = coding->dst_multibyte;
4104   int *charbuf = coding->charbuf;
4105   int *charbuf_end = charbuf + coding->charbuf_used;
4106   unsigned char *dst = coding->destination + coding->produced;
4107   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4108   int safe_room = 4;
4109   int produced_chars = 0;
4110   Lisp_Object attrs, eol_type, charset_list, val;
4111   int ascii_compatible;
4112   struct charset *charset_roman, *charset_kanji, *charset_kana;
4113   int c;
4114
4115   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4116   val = charset_list;
4117   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4118   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4119   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4120
4121   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4122
4123   while (charbuf < charbuf_end)
4124     {
4125       ASSURE_DESTINATION (safe_room);
4126       c = *charbuf++;
4127       /* Now encode the character C.  */
4128       if (ASCII_CHAR_P (c) && ascii_compatible)
4129         EMIT_ONE_ASCII_BYTE (c);
4130       else if (CHAR_BYTE8_P (c))
4131         {
4132           c = CHAR_TO_BYTE8 (c);
4133           EMIT_ONE_BYTE (c);
4134         }
4135       else
4136         {
4137           unsigned code;
4138           struct charset *charset = char_charset (c, charset_list, &code);
4139
4140           if (!charset)
4141             {
4142               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4143                 {
4144                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4145                   charset = CHARSET_FROM_ID (charset_ascii);
4146                 }
4147               else
4148                 {
4149                   c = coding->default_char;
4150                   charset = char_charset (c, charset_list, &code);
4151                 }
4152             }
4153           if (code == CHARSET_INVALID_CODE (charset))
4154             abort ();
4155           if (charset == charset_kanji)
4156             {
4157               int c1, c2;
4158               JIS_TO_SJIS (code);
4159               c1 = code >> 8, c2 = code & 0xFF;
4160               EMIT_TWO_BYTES (c1, c2);
4161             }
4162           else if (charset == charset_kana)
4163             EMIT_ONE_BYTE (code | 0x80);
4164           else
4165             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4166         }
4167     }
4168   coding->result = CODING_RESULT_SUCCESS;
4169   coding->produced_char += produced_chars;
4170   coding->produced = dst - coding->destination;
4171   return 0;
4172 }
4173
4174 static int
4175 encode_coding_big5 (coding)
4176      struct coding_system *coding;
4177 {
4178   int multibytep = coding->dst_multibyte;
4179   int *charbuf = coding->charbuf;
4180   int *charbuf_end = charbuf + coding->charbuf_used;
4181   unsigned char *dst = coding->destination + coding->produced;
4182   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4183   int safe_room = 4;
4184   int produced_chars = 0;
4185   Lisp_Object attrs, eol_type, charset_list, val;
4186   int ascii_compatible;
4187   struct charset *charset_roman, *charset_big5;
4188   int c;
4189
4190   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4191   val = charset_list;
4192   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4193   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4194   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4195
4196   while (charbuf < charbuf_end)
4197     {
4198       ASSURE_DESTINATION (safe_room);
4199       c = *charbuf++;
4200       /* Now encode the character C.  */
4201       if (ASCII_CHAR_P (c) && ascii_compatible)
4202         EMIT_ONE_ASCII_BYTE (c);
4203       else if (CHAR_BYTE8_P (c))
4204         {
4205           c = CHAR_TO_BYTE8 (c);
4206           EMIT_ONE_BYTE (c);
4207         }
4208       else
4209         {
4210           unsigned code;
4211           struct charset *charset = char_charset (c, charset_list, &code);
4212
4213           if (! charset)
4214             {
4215               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4216                 {
4217                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4218                   charset = CHARSET_FROM_ID (charset_ascii);
4219                 }
4220               else
4221                 {
4222                   c = coding->default_char;
4223                   charset = char_charset (c, charset_list, &code);
4224                 }
4225             }
4226           if (code == CHARSET_INVALID_CODE (charset))
4227             abort ();
4228           if (charset == charset_big5)
4229             {
4230               int c1, c2;
4231
4232               c1 = code >> 8, c2 = code & 0xFF;
4233               EMIT_TWO_BYTES (c1, c2);
4234             }
4235           else
4236             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4237         }
4238     }
4239   coding->result = CODING_RESULT_SUCCESS;
4240   coding->produced_char += produced_chars;
4241   coding->produced = dst - coding->destination;
4242   return 0;
4243 }
4244
4245 \f
4246 /*** 10. CCL handlers ***/
4247
4248 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4249    Check if a text is encoded in a coding system of which
4250    encoder/decoder are written in CCL program.  If it is, return
4251    CATEGORY_MASK_CCL, else return 0.  */
4252
4253 static int
4254 detect_coding_ccl (coding, detect_info)
4255      struct coding_system *coding;
4256      struct coding_detection_info *detect_info;
4257 {
4258   unsigned char *src = coding->source, *src_base = src;
4259   unsigned char *src_end = coding->source + coding->src_bytes;
4260   int multibytep = coding->src_multibyte;
4261   int consumed_chars = 0;
4262   int found = 0;
4263   unsigned char *valids = CODING_CCL_VALIDS (coding);
4264   int head_ascii = coding->head_ascii;
4265   Lisp_Object attrs;
4266
4267   detect_info->checked |= CATEGORY_MASK_CCL;
4268
4269   coding = &coding_categories[coding_category_ccl];
4270   attrs = CODING_ID_ATTRS (coding->id);
4271   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4272     src += head_ascii;
4273
4274   while (1)
4275     {
4276       int c;
4277       ONE_MORE_BYTE (c);
4278       if (! valids[c])
4279         break;
4280       if ((valids[c] > 1))
4281         found = CATEGORY_MASK_CCL;
4282     }
4283   detect_info->rejected |= CATEGORY_MASK_CCL;
4284   return 0;
4285
4286  no_more_source:
4287   detect_info->found |= found;
4288   return 1;
4289 }
4290
4291 static void
4292 decode_coding_ccl (coding)
4293      struct coding_system *coding;
4294 {
4295   const unsigned char *src = coding->source + coding->consumed;
4296   unsigned char *src_end = coding->source + coding->src_bytes;
4297   int *charbuf = coding->charbuf;
4298   int *charbuf_end = charbuf + coding->charbuf_size;
4299   int consumed_chars = 0;
4300   int multibytep = coding->src_multibyte;
4301   struct ccl_program ccl;
4302   int source_charbuf[1024];
4303   int source_byteidx[1024];
4304   Lisp_Object attrs, eol_type, charset_list, valids;
4305
4306   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4307   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4308
4309   while (src < src_end)
4310     {
4311       const unsigned char *p = src;
4312       int *source, *source_end;
4313       int i = 0;
4314
4315       if (multibytep)
4316         while (i < 1024 && p < src_end)
4317           {
4318             source_byteidx[i] = p - src;
4319             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4320           }
4321       else
4322         while (i < 1024 && p < src_end)
4323           source_charbuf[i++] = *p++;
4324
4325       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4326         ccl.last_block = 1;
4327
4328       source = source_charbuf;
4329       source_end = source + i;
4330       while (source < source_end)
4331         {
4332           ccl_driver (&ccl, source, charbuf,
4333                       source_end - source, charbuf_end - charbuf,
4334                       charset_list);
4335           source += ccl.consumed;
4336           charbuf += ccl.produced;
4337           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4338             break;
4339         }
4340       if (source < source_end)
4341         src += source_byteidx[source - source_charbuf];
4342       else
4343         src = p;
4344       consumed_chars += source - source_charbuf;
4345
4346       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4347           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4348         break;
4349     }
4350
4351   switch (ccl.status)
4352     {
4353     case CCL_STAT_SUSPEND_BY_SRC:
4354       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4355       break;
4356     case CCL_STAT_SUSPEND_BY_DST:
4357       break;
4358     case CCL_STAT_QUIT:
4359     case CCL_STAT_INVALID_CMD:
4360       coding->result = CODING_RESULT_INTERRUPT;
4361       break;
4362     default:
4363       coding->result = CODING_RESULT_SUCCESS;
4364       break;
4365     }
4366   coding->consumed_char += consumed_chars;
4367   coding->consumed = src - coding->source;
4368   coding->charbuf_used = charbuf - coding->charbuf;
4369 }
4370
4371 static int
4372 encode_coding_ccl (coding)
4373      struct coding_system *coding;
4374 {
4375   struct ccl_program ccl;
4376   int multibytep = coding->dst_multibyte;
4377   int *charbuf = coding->charbuf;
4378   int *charbuf_end = charbuf + coding->charbuf_used;
4379   unsigned char *dst = coding->destination + coding->produced;
4380   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4381   unsigned char *adjusted_dst_end = dst_end - 1;
4382   int destination_charbuf[1024];
4383   int i, produced_chars = 0;
4384   Lisp_Object attrs, eol_type, charset_list;
4385
4386   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4387   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4388
4389   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4390   ccl.dst_multibyte = coding->dst_multibyte;
4391
4392   while (charbuf < charbuf_end && dst < adjusted_dst_end)
4393     {
4394       int dst_bytes = dst_end - dst;
4395       if (dst_bytes > 1024)
4396         dst_bytes = 1024;
4397
4398       ccl_driver (&ccl, charbuf, destination_charbuf,
4399                   charbuf_end - charbuf, dst_bytes, charset_list);
4400       charbuf += ccl.consumed;
4401       if (multibytep)
4402         for (i = 0; i < ccl.produced; i++)
4403           EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4404       else
4405         {
4406           for (i = 0; i < ccl.produced; i++)
4407             *dst++ = destination_charbuf[i] & 0xFF;
4408           produced_chars += ccl.produced;
4409         }
4410     }
4411
4412   switch (ccl.status)
4413     {
4414     case CCL_STAT_SUSPEND_BY_SRC:
4415       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4416       break;
4417     case CCL_STAT_SUSPEND_BY_DST:
4418       coding->result = CODING_RESULT_INSUFFICIENT_DST;
4419       break;
4420     case CCL_STAT_QUIT:
4421     case CCL_STAT_INVALID_CMD:
4422       coding->result = CODING_RESULT_INTERRUPT;
4423       break;
4424     default:
4425       coding->result = CODING_RESULT_SUCCESS;
4426       break;
4427     }
4428
4429   coding->produced_char += produced_chars;
4430   coding->produced = dst - coding->destination;
4431   return 0;
4432 }
4433
4434
4435 \f
4436 /*** 10, 11. no-conversion handlers ***/
4437
4438 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4439
4440 static void
4441 decode_coding_raw_text (coding)
4442      struct coding_system *coding;
4443 {
4444   coding->chars_at_source = 1;
4445   coding->consumed_char = 0;
4446   coding->consumed = 0;
4447   coding->result = CODING_RESULT_SUCCESS;
4448 }
4449
4450 static int
4451 encode_coding_raw_text (coding)
4452      struct coding_system *coding;
4453 {
4454   int multibytep = coding->dst_multibyte;
4455   int *charbuf = coding->charbuf;
4456   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4457   unsigned char *dst = coding->destination + coding->produced;
4458   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4459   int produced_chars = 0;
4460   int c;
4461
4462   if (multibytep)
4463     {
4464       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4465
4466       if (coding->src_multibyte)
4467         while (charbuf < charbuf_end)
4468           {
4469             ASSURE_DESTINATION (safe_room);
4470             c = *charbuf++;
4471             if (ASCII_CHAR_P (c))
4472               EMIT_ONE_ASCII_BYTE (c);
4473             else if (CHAR_BYTE8_P (c))
4474               {
4475                 c = CHAR_TO_BYTE8 (c);
4476                 EMIT_ONE_BYTE (c);
4477               }
4478             else
4479               {
4480                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4481
4482                 CHAR_STRING_ADVANCE (c, p1);
4483                 while (p0 < p1)
4484                   {
4485                     EMIT_ONE_BYTE (*p0);
4486                     p0++;
4487                   }
4488               }
4489           }
4490       else
4491         while (charbuf < charbuf_end)
4492           {
4493             ASSURE_DESTINATION (safe_room);
4494             c = *charbuf++;
4495             EMIT_ONE_BYTE (c);
4496           }
4497     }
4498   else
4499     {
4500       if (coding->src_multibyte)
4501         {
4502           int safe_room = MAX_MULTIBYTE_LENGTH;
4503
4504           while (charbuf < charbuf_end)
4505             {
4506               ASSURE_DESTINATION (safe_room);
4507               c = *charbuf++;
4508               if (ASCII_CHAR_P (c))
4509                 *dst++ = c;
4510               else if (CHAR_BYTE8_P (c))
4511                 *dst++ = CHAR_TO_BYTE8 (c);
4512               else
4513                 CHAR_STRING_ADVANCE (c, dst);
4514               produced_chars++;
4515             }
4516         }
4517       else
4518         {
4519           ASSURE_DESTINATION (charbuf_end - charbuf);
4520           while (charbuf < charbuf_end && dst < dst_end)
4521             *dst++ = *charbuf++;
4522           produced_chars = dst - (coding->destination + coding->dst_bytes);
4523         }
4524     }
4525   coding->result = CODING_RESULT_SUCCESS;
4526   coding->produced_char += produced_chars;
4527   coding->produced = dst - coding->destination;
4528   return 0;
4529 }
4530
4531 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4532    Check if a text is encoded in a charset-based coding system.  If it
4533    is, return 1, else return 0.  */
4534
4535 static int
4536 detect_coding_charset (coding, detect_info)
4537      struct coding_system *coding;
4538      struct coding_detection_info *detect_info;
4539 {
4540   unsigned char *src = coding->source, *src_base = src;
4541   unsigned char *src_end = coding->source + coding->src_bytes;
4542   int multibytep = coding->src_multibyte;
4543   int consumed_chars = 0;
4544   Lisp_Object attrs, valids;
4545   int found = 0;
4546
4547   detect_info->checked |= CATEGORY_MASK_CHARSET;
4548
4549   coding = &coding_categories[coding_category_charset];
4550   attrs = CODING_ID_ATTRS (coding->id);
4551   valids = AREF (attrs, coding_attr_charset_valids);
4552
4553   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4554     src += coding->head_ascii;
4555
4556   while (1)
4557     {
4558       int c;
4559
4560       ONE_MORE_BYTE (c);
4561       if (NILP (AREF (valids, c)))
4562         break;
4563       if (c >= 0x80)
4564         found = CATEGORY_MASK_CHARSET;
4565     }
4566   detect_info->rejected |= CATEGORY_MASK_CHARSET;
4567   return 0;
4568
4569  no_more_source:
4570   detect_info->found |= found;
4571   return 1;
4572 }
4573
4574 static void
4575 decode_coding_charset (coding)
4576      struct coding_system *coding;
4577 {
4578   unsigned char *src = coding->source + coding->consumed;
4579   unsigned char *src_end = coding->source + coding->src_bytes;
4580   unsigned char *src_base;
4581   int *charbuf = coding->charbuf;
4582   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4583   int consumed_chars = 0, consumed_chars_base;
4584   int multibytep = coding->src_multibyte;
4585   Lisp_Object attrs, eol_type, charset_list, valids;
4586   int char_offset = coding->produced_char;
4587   int last_offset = char_offset;
4588   int last_id = charset_ascii;
4589
4590   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4591   valids = AREF (attrs, coding_attr_charset_valids);
4592
4593   while (1)
4594     {
4595       int c;
4596
4597       src_base = src;
4598       consumed_chars_base = consumed_chars;
4599
4600       if (charbuf >= charbuf_end)
4601         break;
4602
4603       ONE_MORE_BYTE (c);
4604       if (c == '\r')
4605         {
4606           /* Here we assume that no charset maps '\r' to something
4607              else.  */
4608           if (EQ (eol_type, Qdos))
4609             {
4610               if (src == src_end)
4611                 {
4612                   coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4613                   goto no_more_source;
4614                 }
4615               if (*src == '\n')
4616                 ONE_MORE_BYTE (c);
4617             }
4618           else if (EQ (eol_type, Qmac))
4619             c = '\n';
4620         }
4621       else
4622         {
4623           Lisp_Object val;
4624           struct charset *charset;
4625           int dim;
4626           int len = 1;
4627           unsigned code = c;
4628
4629           val = AREF (valids, c);
4630           if (NILP (val))
4631             goto invalid_code;
4632           if (INTEGERP (val))
4633             {
4634               charset = CHARSET_FROM_ID (XFASTINT (val));
4635               dim = CHARSET_DIMENSION (charset);
4636               while (len < dim)
4637                 {
4638                   ONE_MORE_BYTE (c);
4639                   code = (code << 8) | c;
4640                   len++;
4641                 }
4642               CODING_DECODE_CHAR (coding, src, src_base, src_end,
4643                                   charset, code, c);
4644             }
4645           else
4646             {
4647               /* VAL is a list of charset IDs.  It is assured that the
4648                  list is sorted by charset dimensions (smaller one
4649                  comes first).  */
4650               while (CONSP (val))
4651                 {
4652                   charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4653                   dim = CHARSET_DIMENSION (charset);
4654                   while (len < dim)
4655                     {
4656                       ONE_MORE_BYTE (c);
4657                       code = (code << 8) | c;
4658                       len++;
4659                     }
4660                   CODING_DECODE_CHAR (coding, src, src_base,
4661                                       src_end, charset, code, c);
4662                   if (c >= 0)
4663                     break;
4664                   val = XCDR (val);
4665                 }
4666             }
4667           if (c < 0)
4668             goto invalid_code;
4669           if (charset->id != charset_ascii
4670               && last_id != charset->id)
4671             {
4672               if (last_id != charset_ascii)
4673                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4674               last_id = charset->id;
4675               last_offset = char_offset;
4676             }
4677         }
4678       *charbuf++ = c;
4679       char_offset++;
4680       continue;
4681
4682     invalid_code:
4683       src = src_base;
4684       consumed_chars = consumed_chars_base;
4685       ONE_MORE_BYTE (c);
4686       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4687       char_offset++;
4688       coding->errors++;
4689     }
4690
4691  no_more_source:
4692   if (last_id != charset_ascii)
4693     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4694   coding->consumed_char += consumed_chars_base;
4695   coding->consumed = src_base - coding->source;
4696   coding->charbuf_used = charbuf - coding->charbuf;
4697 }
4698
4699 static int
4700 encode_coding_charset (coding)
4701      struct coding_system *coding;
4702 {
4703   int multibytep = coding->dst_multibyte;
4704   int *charbuf = coding->charbuf;
4705   int *charbuf_end = charbuf + coding->charbuf_used;
4706   unsigned char *dst = coding->destination + coding->produced;
4707   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4708   int safe_room = MAX_MULTIBYTE_LENGTH;
4709   int produced_chars = 0;
4710   Lisp_Object attrs, eol_type, charset_list;
4711   int ascii_compatible;
4712   int c;
4713
4714   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4715   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4716
4717   while (charbuf < charbuf_end)
4718     {
4719       struct charset *charset;
4720       unsigned code;
4721
4722       ASSURE_DESTINATION (safe_room);
4723       c = *charbuf++;
4724       if (ascii_compatible && ASCII_CHAR_P (c))
4725         EMIT_ONE_ASCII_BYTE (c);
4726       else if (CHAR_BYTE8_P (c))
4727         {
4728           c = CHAR_TO_BYTE8 (c);
4729           EMIT_ONE_BYTE (c);
4730         }
4731       else
4732         {
4733           charset = char_charset (c, charset_list, &code);
4734           if (charset)
4735             {
4736               if (CHARSET_DIMENSION (charset) == 1)
4737                 EMIT_ONE_BYTE (code);
4738               else if (CHARSET_DIMENSION (charset) == 2)
4739                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4740               else if (CHARSET_DIMENSION (charset) == 3)
4741                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4742               else
4743                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4744                                  (code >> 8) & 0xFF, code & 0xFF);
4745             }
4746           else
4747             {
4748               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4749                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4750               else
4751                 c = coding->default_char;
4752               EMIT_ONE_BYTE (c);
4753             }
4754         }
4755     }
4756
4757   coding->result = CODING_RESULT_SUCCESS;
4758   coding->produced_char += produced_chars;
4759   coding->produced = dst - coding->destination;
4760   return 0;
4761 }
4762
4763 \f
4764 /*** 7. C library functions ***/
4765
4766 /* Setup coding context CODING from information about CODING_SYSTEM.
4767    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
4768    CODING_SYSTEM is invalid, signal an error.  */
4769
4770 void
4771 setup_coding_system (coding_system, coding)
4772      Lisp_Object coding_system;
4773      struct coding_system *coding;
4774 {
4775   Lisp_Object attrs;
4776   Lisp_Object eol_type;
4777   Lisp_Object coding_type;
4778   Lisp_Object val;
4779
4780   if (NILP (coding_system))
4781     coding_system = Qno_conversion;
4782
4783   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4784
4785   attrs = CODING_ID_ATTRS (coding->id);
4786   eol_type = CODING_ID_EOL_TYPE (coding->id);
4787
4788   coding->mode = 0;
4789   coding->head_ascii = -1;
4790   coding->common_flags
4791     = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4792
4793   val = CODING_ATTR_SAFE_CHARSETS (attrs);
4794   coding->max_charset_id = XSTRING (val)->size - 1;
4795   coding->safe_charsets = (char *) XSTRING (val)->data;
4796   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4797
4798   coding_type = CODING_ATTR_TYPE (attrs);
4799   if (EQ (coding_type, Qundecided))
4800     {
4801       coding->detector = NULL;
4802       coding->decoder = decode_coding_raw_text;
4803       coding->encoder = encode_coding_raw_text;
4804       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4805     }
4806   else if (EQ (coding_type, Qiso_2022))
4807     {
4808       int i;
4809       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4810       enum coding_category category =  XINT (CODING_ATTR_CATEGORY (attrs));
4811
4812       /* Invoke graphic register 0 to plane 0.  */
4813       CODING_ISO_INVOCATION (coding, 0) = 0;
4814       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
4815       CODING_ISO_INVOCATION (coding, 1)
4816         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4817       /* Setup the initial status of designation.  */
4818       for (i = 0; i < 4; i++)
4819         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4820       /* Not single shifting initially.  */
4821       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4822       /* Beginning of buffer should also be regarded as bol. */
4823       CODING_ISO_BOL (coding) = 1;
4824       coding->detector = detect_coding_iso_2022;
4825       coding->decoder = decode_coding_iso_2022;
4826       coding->encoder = encode_coding_iso_2022;
4827       if (flags & CODING_ISO_FLAG_SAFE)
4828         coding->mode |= CODING_MODE_SAFE_ENCODING;
4829       coding->common_flags
4830         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4831             | CODING_REQUIRE_FLUSHING_MASK);
4832       if (flags & CODING_ISO_FLAG_COMPOSITION)
4833         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4834       if (flags & CODING_ISO_FLAG_DESIGNATION)
4835         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
4836       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4837         {
4838           setup_iso_safe_charsets (attrs);
4839           val = CODING_ATTR_SAFE_CHARSETS (attrs);
4840           coding->max_charset_id = XSTRING (val)->size - 1;
4841           coding->safe_charsets = (char *) XSTRING (val)->data;
4842         }
4843       CODING_ISO_FLAGS (coding) = flags;
4844     }
4845   else if (EQ (coding_type, Qcharset))
4846     {
4847       coding->detector = detect_coding_charset;
4848       coding->decoder = decode_coding_charset;
4849       coding->encoder = encode_coding_charset;
4850       coding->common_flags
4851         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4852     }
4853   else if (EQ (coding_type, Qutf_8))
4854     {
4855       coding->detector = detect_coding_utf_8;
4856       coding->decoder = decode_coding_utf_8;
4857       coding->encoder = encode_coding_utf_8;
4858       coding->common_flags
4859         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4860     }
4861   else if (EQ (coding_type, Qutf_16))
4862     {
4863       val = AREF (attrs, coding_attr_utf_16_bom);
4864       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4865                                     : EQ (val, Qt) ? utf_16_with_bom
4866                                     : utf_16_without_bom);
4867       val = AREF (attrs, coding_attr_utf_16_endian);
4868       CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4869                                        : utf_16_little_endian);
4870       CODING_UTF_16_SURROGATE (coding) = 0;
4871       coding->detector = detect_coding_utf_16;
4872       coding->decoder = decode_coding_utf_16;
4873       coding->encoder = encode_coding_utf_16;
4874       coding->common_flags
4875         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4876     }
4877   else if (EQ (coding_type, Qccl))
4878     {
4879       coding->detector = detect_coding_ccl;
4880       coding->decoder = decode_coding_ccl;
4881       coding->encoder = encode_coding_ccl;
4882       coding->common_flags
4883         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4884             | CODING_REQUIRE_FLUSHING_MASK);
4885     }
4886   else if (EQ (coding_type, Qemacs_mule))
4887     {
4888       coding->detector = detect_coding_emacs_mule;
4889       coding->decoder = decode_coding_emacs_mule;
4890       coding->encoder = encode_coding_emacs_mule;
4891       coding->common_flags
4892         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4893       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4894           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4895         {
4896           Lisp_Object tail, safe_charsets;
4897           int max_charset_id = 0;
4898
4899           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4900                tail = XCDR (tail))
4901             if (max_charset_id < XFASTINT (XCAR (tail)))
4902               max_charset_id = XFASTINT (XCAR (tail));
4903           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4904                                         make_number (255));
4905           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4906                tail = XCDR (tail))
4907             XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4908           coding->max_charset_id = max_charset_id;
4909           coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4910         }
4911     }
4912   else if (EQ (coding_type, Qshift_jis))
4913     {
4914       coding->detector = detect_coding_sjis;
4915       coding->decoder = decode_coding_sjis;
4916       coding->encoder = encode_coding_sjis;
4917       coding->common_flags
4918         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4919     }
4920   else if (EQ (coding_type, Qbig5))
4921     {
4922       coding->detector = detect_coding_big5;
4923       coding->decoder = decode_coding_big5;
4924       coding->encoder = encode_coding_big5;
4925       coding->common_flags
4926         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4927     }
4928   else                          /* EQ (coding_type, Qraw_text) */
4929     {
4930       coding->detector = NULL;
4931       coding->decoder = decode_coding_raw_text;
4932       coding->encoder = encode_coding_raw_text;
4933       coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4934     }
4935
4936   return;
4937 }
4938
4939 /* Return raw-text or one of its subsidiaries that has the same
4940    eol_type as CODING-SYSTEM.  */
4941
4942 Lisp_Object
4943 raw_text_coding_system (coding_system)
4944      Lisp_Object coding_system;
4945 {
4946   Lisp_Object spec, attrs;
4947   Lisp_Object eol_type, raw_text_eol_type;
4948
4949   spec = CODING_SYSTEM_SPEC (coding_system);
4950   attrs = AREF (spec, 0);
4951
4952   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4953     return coding_system;
4954
4955   eol_type = AREF (spec, 2);
4956   if (VECTORP (eol_type))
4957     return Qraw_text;
4958   spec = CODING_SYSTEM_SPEC (Qraw_text);
4959   raw_text_eol_type = AREF (spec, 2);
4960   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4961           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4962           : AREF (raw_text_eol_type, 2));
4963 }
4964
4965
4966 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4967    does, return one of the subsidiary that has the same eol-spec as
4968    PARENT.  Otherwise, return CODING_SYSTEM.  */
4969
4970 Lisp_Object
4971 coding_inherit_eol_type (coding_system, parent)
4972      Lisp_Object coding_system, parent;
4973 {
4974   Lisp_Object spec, attrs, eol_type;
4975
4976   spec = CODING_SYSTEM_SPEC (coding_system);
4977   attrs = AREF (spec, 0);
4978   eol_type = AREF (spec, 2);
4979   if (VECTORP (eol_type))
4980     {
4981       Lisp_Object parent_spec;
4982       Lisp_Object parent_eol_type;
4983
4984       parent_spec
4985         = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4986       parent_eol_type = AREF (parent_spec, 2);
4987       if (EQ (parent_eol_type, Qunix))
4988         coding_system = AREF (eol_type, 0);
4989       else if (EQ (parent_eol_type, Qdos))
4990         coding_system = AREF (eol_type, 1);
4991       else if (EQ (parent_eol_type, Qmac))
4992         coding_system = AREF (eol_type, 2);
4993     }
4994   return coding_system;
4995 }
4996
4997 /* Emacs has a mechanism to automatically detect a coding system if it
4998    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
4999    it's impossible to distinguish some coding systems accurately
5000    because they use the same range of codes.  So, at first, coding
5001    systems are categorized into 7, those are:
5002
5003    o coding-category-emacs-mule
5004
5005         The category for a coding system which has the same code range
5006         as Emacs' internal format.  Assigned the coding-system (Lisp
5007         symbol) `emacs-mule' by default.
5008
5009    o coding-category-sjis
5010
5011         The category for a coding system which has the same code range
5012         as SJIS.  Assigned the coding-system (Lisp
5013         symbol) `japanese-shift-jis' by default.
5014
5015    o coding-category-iso-7
5016
5017         The category for a coding system which has the same code range
5018         as ISO2022 of 7-bit environment.  This doesn't use any locking
5019         shift and single shift functions.  This can encode/decode all
5020         charsets.  Assigned the coding-system (Lisp symbol)
5021         `iso-2022-7bit' by default.
5022
5023    o coding-category-iso-7-tight
5024
5025         Same as coding-category-iso-7 except that this can
5026         encode/decode only the specified charsets.
5027
5028    o coding-category-iso-8-1
5029
5030         The category for a coding system which has the same code range
5031         as ISO2022 of 8-bit environment and graphic plane 1 used only
5032         for DIMENSION1 charset.  This doesn't use any locking shift
5033         and single shift functions.  Assigned the coding-system (Lisp
5034         symbol) `iso-latin-1' by default.
5035
5036    o coding-category-iso-8-2
5037
5038         The category for a coding system which has the same code range
5039         as ISO2022 of 8-bit environment and graphic plane 1 used only
5040         for DIMENSION2 charset.  This doesn't use any locking shift
5041         and single shift functions.  Assigned the coding-system (Lisp
5042         symbol) `japanese-iso-8bit' by default.
5043
5044    o coding-category-iso-7-else
5045
5046         The category for a coding system which has the same code range
5047         as ISO2022 of 7-bit environemnt but uses locking shift or
5048         single shift functions.  Assigned the coding-system (Lisp
5049         symbol) `iso-2022-7bit-lock' by default.
5050
5051    o coding-category-iso-8-else
5052
5053         The category for a coding system which has the same code range
5054         as ISO2022 of 8-bit environemnt but uses locking shift or
5055         single shift functions.  Assigned the coding-system (Lisp
5056         symbol) `iso-2022-8bit-ss2' by default.
5057
5058    o coding-category-big5
5059
5060         The category for a coding system which has the same code range
5061         as BIG5.  Assigned the coding-system (Lisp symbol)
5062         `cn-big5' by default.
5063
5064    o coding-category-utf-8
5065
5066         The category for a coding system which has the same code range
5067         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
5068         symbol) `utf-8' by default.
5069
5070    o coding-category-utf-16-be
5071
5072         The category for a coding system in which a text has an
5073         Unicode signature (cf. Unicode Standard) in the order of BIG
5074         endian at the head.  Assigned the coding-system (Lisp symbol)
5075         `utf-16-be' by default.
5076
5077    o coding-category-utf-16-le
5078
5079         The category for a coding system in which a text has an
5080         Unicode signature (cf. Unicode Standard) in the order of
5081         LITTLE endian at the head.  Assigned the coding-system (Lisp
5082         symbol) `utf-16-le' by default.
5083
5084    o coding-category-ccl
5085
5086         The category for a coding system of which encoder/decoder is
5087         written in CCL programs.  The default value is nil, i.e., no
5088         coding system is assigned.
5089
5090    o coding-category-binary
5091
5092         The category for a coding system not categorized in any of the
5093         above.  Assigned the coding-system (Lisp symbol)
5094         `no-conversion' by default.
5095
5096    Each of them is a Lisp symbol and the value is an actual
5097    `coding-system's (this is also a Lisp symbol) assigned by a user.
5098    What Emacs does actually is to detect a category of coding system.
5099    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5100    decide only one possible category, it selects a category of the
5101    highest priority.  Priorities of categories are also specified by a
5102    user in a Lisp variable `coding-category-list'.
5103
5104 */
5105
5106 #define EOL_SEEN_NONE   0
5107 #define EOL_SEEN_LF     1
5108 #define EOL_SEEN_CR     2
5109 #define EOL_SEEN_CRLF   4
5110
5111 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5112    SOURCE is encoded.  If CATEGORY is one of
5113    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5114    two-byte, else they are encoded by one-byte.
5115
5116    Return one of EOL_SEEN_XXX.  */
5117
5118 #define MAX_EOL_CHECK_COUNT 3
5119
5120 static int
5121 detect_eol (source, src_bytes, category)
5122      unsigned char *source;
5123      EMACS_INT src_bytes;
5124      enum coding_category category;
5125 {
5126   unsigned char *src = source, *src_end = src + src_bytes;
5127   unsigned char c;
5128   int total  = 0;
5129   int eol_seen = EOL_SEEN_NONE;
5130
5131   if ((1 << category) & CATEGORY_MASK_UTF_16)
5132     {
5133       int msb, lsb;
5134
5135       msb = category == (coding_category_utf_16_le
5136                          | coding_category_utf_16_le_nosig);
5137       lsb = 1 - msb;
5138
5139       while (src + 1 < src_end)
5140         {
5141           c = src[lsb];
5142           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5143             {
5144               int this_eol;
5145
5146               if (c == '\n')
5147                 this_eol = EOL_SEEN_LF;
5148               else if (src + 3 >= src_end
5149                        || src[msb + 2] != 0
5150                        || src[lsb + 2] != '\n')
5151                 this_eol = EOL_SEEN_CR;
5152               else
5153                 this_eol = EOL_SEEN_CRLF;
5154
5155               if (eol_seen == EOL_SEEN_NONE)
5156                 /* This is the first end-of-line.  */
5157                 eol_seen = this_eol;
5158               else if (eol_seen != this_eol)
5159                 {
5160                   /* The found type is different from what found before.  */
5161                   eol_seen = EOL_SEEN_LF;
5162                   break;
5163                 }
5164               if (++total == MAX_EOL_CHECK_COUNT)
5165                 break;
5166             }
5167           src += 2;
5168         }
5169     }
5170   else
5171     {
5172       while (src < src_end)
5173         {
5174           c = *src++;
5175           if (c == '\n' || c == '\r')
5176             {
5177               int this_eol;
5178
5179               if (c == '\n')
5180                 this_eol = EOL_SEEN_LF;
5181               else if (src >= src_end || *src != '\n')
5182                 this_eol = EOL_SEEN_CR;
5183               else
5184                 this_eol = EOL_SEEN_CRLF, src++;
5185
5186               if (eol_seen == EOL_SEEN_NONE)
5187                 /* This is the first end-of-line.  */
5188                 eol_seen = this_eol;
5189               else if (eol_seen != this_eol)
5190                 {
5191                   /* The found type is different from what found before.  */
5192                   eol_seen = EOL_SEEN_LF;
5193                   break;
5194                 }
5195               if (++total == MAX_EOL_CHECK_COUNT)
5196                 break;
5197             }
5198         }
5199     }
5200   return eol_seen;
5201 }
5202
5203
5204 static void
5205 adjust_coding_eol_type (coding, eol_seen)
5206      struct coding_system *coding;
5207      int eol_seen;
5208 {
5209   Lisp_Object eol_type;
5210
5211   eol_type = CODING_ID_EOL_TYPE (coding->id);
5212   if (eol_seen & EOL_SEEN_LF)
5213     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5214   else if (eol_seen & EOL_SEEN_CRLF)
5215     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5216   else if (eol_seen & EOL_SEEN_CR)
5217     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5218 }
5219
5220 /* Detect how a text specified in CODING is encoded.  If a coding
5221    system is detected, update fields of CODING by the detected coding
5222    system.  */
5223
5224 void
5225 detect_coding (coding)
5226      struct coding_system *coding;
5227 {
5228   unsigned char *src, *src_end;
5229   Lisp_Object attrs, coding_type;
5230
5231   coding->consumed = coding->consumed_char = 0;
5232   coding->produced = coding->produced_char = 0;
5233   coding_set_source (coding);
5234
5235   src_end = coding->source + coding->src_bytes;
5236
5237   /* If we have not yet decided the text encoding type, detect it
5238      now.  */
5239   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5240     {
5241       int c, i;
5242
5243       for (src = coding->source; src < src_end; src++)
5244         {
5245           c = *src;
5246           if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5247                                         || c == ISO_CODE_SI
5248                                         || c == ISO_CODE_SO)))
5249             break;
5250         }
5251       coding->head_ascii = src - (coding->source + coding->consumed);
5252
5253       if (coding->head_ascii < coding->src_bytes)
5254         {
5255           struct coding_detection_info detect_info;
5256           enum coding_category category;
5257           struct coding_system *this;
5258
5259           detect_info.checked = detect_info.found = detect_info.rejected = 0;
5260           for (i = 0; i < coding_category_raw_text; i++)
5261             {
5262               category = coding_priorities[i];
5263               this = coding_categories + category;
5264               if (this->id < 0)
5265                 {
5266                   /* No coding system of this category is defined.  */
5267                   detect_info.rejected |= (1 << category);
5268                 }
5269               else if (category >= coding_category_raw_text)
5270                 continue;
5271               else if (detect_info.checked & (1 << category))
5272                 {
5273                   if (detect_info.found & (1 << category))
5274                     break;
5275                 }
5276               else if ((*(this->detector)) (coding, &detect_info)
5277                        && detect_info.found & (1 << category))
5278                 break;
5279             }
5280           if (i < coding_category_raw_text)
5281             setup_coding_system (CODING_ID_NAME (this->id), coding);
5282           else if (detect_info.rejected == CATEGORY_MASK_ANY)
5283             setup_coding_system (Qraw_text, coding);
5284           else if (detect_info.rejected)
5285             for (i = 0; i < coding_category_raw_text; i++)
5286               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5287                 {
5288                   this = coding_categories + coding_priorities[i];
5289                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5290                   break;
5291                 }
5292         }
5293     }
5294
5295   attrs = CODING_ID_ATTRS (coding->id);
5296   coding_type = CODING_ATTR_TYPE (attrs);
5297
5298   /* If we have not yet decided the EOL type, detect it now.  But, the
5299      detection is impossible for a CCL based coding system, in which
5300      case, we detct the EOL type after decoding.  */
5301   if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5302       && ! EQ (coding_type, Qccl))
5303     {
5304       int eol_seen = detect_eol (coding->source, coding->src_bytes,
5305                                  XINT (CODING_ATTR_CATEGORY (attrs)));
5306
5307       if (eol_seen != EOL_SEEN_NONE)
5308         adjust_coding_eol_type (coding, eol_seen);
5309     }
5310 }
5311
5312
5313 static void
5314 decode_eol (coding)
5315      struct coding_system *coding;
5316 {
5317   if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
5318     {
5319       unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5320       unsigned char *pend = p + coding->produced;
5321       int eol_seen = EOL_SEEN_NONE;
5322
5323       for (; p < pend; p++)
5324         {
5325           if (*p == '\n')
5326             eol_seen |= EOL_SEEN_LF;
5327           else if (*p == '\r')
5328             {
5329               if (p + 1 < pend && *(p + 1) == '\n')
5330                 {
5331                   eol_seen |= EOL_SEEN_CRLF;
5332                   p++;
5333                 }
5334               else
5335                 eol_seen |= EOL_SEEN_CR;
5336             }
5337         }
5338       if (eol_seen != EOL_SEEN_NONE)
5339         adjust_coding_eol_type (coding, eol_seen);
5340     }
5341
5342   if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5343     {
5344       unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5345       unsigned char *pend = p + coding->produced;
5346
5347       for (; p < pend; p++)
5348         if (*p == '\r')
5349           *p = '\n';
5350     }
5351   else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5352     {
5353       unsigned char *p, *pbeg, *pend;
5354       Lisp_Object undo_list;
5355
5356       move_gap_both (coding->dst_pos + coding->produced_char,
5357                      coding->dst_pos_byte + coding->produced);
5358       undo_list = current_buffer->undo_list;
5359       current_buffer->undo_list = Qt;
5360       del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
5361       current_buffer->undo_list = undo_list;
5362       pbeg = GPT_ADDR;
5363       pend = pbeg + coding->produced;
5364
5365       for (p = pend - 1; p >= pbeg; p--)
5366         if (*p == '\r')
5367           {
5368             safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5369             pend--;
5370           }
5371       coding->produced_char -= coding->produced - (pend - pbeg);
5372       coding->produced = pend - pbeg;
5373       insert_from_gap (coding->produced_char, coding->produced);
5374     }
5375 }
5376
5377 static void
5378 translate_chars (coding, table)
5379      struct coding_system *coding;
5380      Lisp_Object table;
5381 {
5382   int *charbuf = coding->charbuf;
5383   int *charbuf_end = charbuf + coding->charbuf_used;
5384   int c;
5385
5386   if (coding->chars_at_source)
5387     return;
5388
5389   while (charbuf < charbuf_end)
5390     {
5391       c = *charbuf;
5392       if (c < 0)
5393         charbuf += c;
5394       else
5395         *charbuf++ = translate_char (table, c);
5396     }
5397 }
5398
5399 static int
5400 produce_chars (coding)
5401      struct coding_system *coding;
5402 {
5403   unsigned char *dst = coding->destination + coding->produced;
5404   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5405   int produced;
5406   int produced_chars = 0;
5407
5408   if (! coding->chars_at_source)
5409     {
5410       /* Characters are in coding->charbuf.  */
5411       int *buf = coding->charbuf;
5412       int *buf_end = buf + coding->charbuf_used;
5413       unsigned char *adjusted_dst_end;
5414
5415       if (BUFFERP (coding->src_object)
5416           && EQ (coding->src_object, coding->dst_object))
5417         dst_end = coding->source + coding->consumed;
5418       adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5419
5420       while (buf < buf_end)
5421         {
5422           int c = *buf++;
5423
5424           if (dst >= adjusted_dst_end)
5425             {
5426               dst = alloc_destination (coding,
5427                                        buf_end - buf + MAX_MULTIBYTE_LENGTH,
5428                                        dst);
5429               dst_end = coding->destination + coding->dst_bytes;
5430               adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5431             }
5432           if (c >= 0)
5433             {
5434               if (coding->dst_multibyte
5435                   || ! CHAR_BYTE8_P (c))
5436                 CHAR_STRING_ADVANCE (c, dst);
5437               else
5438                 *dst++ = CHAR_TO_BYTE8 (c);
5439               produced_chars++;
5440             }
5441           else
5442             /* This is an annotation datum.  */
5443             buf -= c + 1;
5444         }
5445     }
5446   else
5447     {
5448       unsigned char *src = coding->source;
5449       unsigned char *src_end = src + coding->src_bytes;
5450       Lisp_Object eol_type;
5451
5452       eol_type = CODING_ID_EOL_TYPE (coding->id);
5453
5454       if (coding->src_multibyte != coding->dst_multibyte)
5455         {
5456           if (coding->src_multibyte)
5457             {
5458               int multibytep = 1;
5459               int consumed_chars;
5460
5461               while (1)
5462                 {
5463                   unsigned char *src_base = src;
5464                   int c;
5465
5466                   ONE_MORE_BYTE (c);
5467                   if (c == '\r')
5468                     {
5469                       if (EQ (eol_type, Qdos))
5470                         {
5471                           if (src == src_end)
5472                             {
5473                               coding->result = CODING_RESULT_INSUFFICIENT_SRC;
5474                               goto no_more_source;
5475                             }
5476                           if (*src == '\n')
5477                             c = *src++;
5478                         }
5479                       else if (EQ (eol_type, Qmac))
5480                         c = '\n';
5481                     }
5482                   if (dst == dst_end)
5483                     {
5484                       coding->consumed = src - coding->source;
5485
5486                     if (EQ (coding->src_object, coding->dst_object))
5487                       dst_end = src;
5488                     if (dst == dst_end)
5489                       {
5490                         dst = alloc_destination (coding, src_end - src + 1,
5491                                                  dst);
5492                         dst_end = coding->destination + coding->dst_bytes;
5493                         coding_set_source (coding);
5494                         src = coding->source + coding->consumed;
5495                         src_end = coding->source + coding->src_bytes;
5496                       }
5497                     }
5498                   *dst++ = c;
5499                   produced_chars++;
5500                 }
5501             no_more_source:
5502               ;
5503             }
5504           else
5505             while (src < src_end)
5506               {
5507                 int multibytep = 1;
5508                 int c = *src++;
5509
5510                 if (c == '\r')
5511                   {
5512                     if (EQ (eol_type, Qdos))
5513                       {
5514                         if (src < src_end
5515                             && *src == '\n')
5516                           c = *src++;
5517                       }
5518                     else if (EQ (eol_type, Qmac))
5519                       c = '\n';
5520                   }
5521                 if (dst >= dst_end - 1)
5522                   {
5523                     coding->consumed = src - coding->source;
5524
5525                     if (EQ (coding->src_object, coding->dst_object))
5526                       dst_end = src;
5527                     if (dst >= dst_end - 1)
5528                       {
5529                         dst = alloc_destination (coding, src_end - src + 2,
5530                                                  dst);
5531                         dst_end = coding->destination + coding->dst_bytes;
5532                         coding_set_source (coding);
5533                         src = coding->source + coding->consumed;
5534                         src_end = coding->source + coding->src_bytes;
5535                       }
5536                   }
5537                 EMIT_ONE_BYTE (c);
5538               }
5539         }
5540       else
5541         {
5542           if (!EQ (coding->src_object, coding->dst_object))
5543             {
5544               int require = coding->src_bytes - coding->dst_bytes;
5545
5546               if (require > 0)
5547                 {
5548                   EMACS_INT offset = src - coding->source;
5549
5550                   dst = alloc_destination (coding, require, dst);
5551                   coding_set_source (coding);
5552                   src = coding->source + offset;
5553                   src_end = coding->source + coding->src_bytes;
5554                 }
5555             }
5556           produced_chars = coding->src_chars;
5557           while (src < src_end)
5558             {
5559               int c = *src++;
5560
5561               if (c == '\r')
5562                 {
5563                   if (EQ (eol_type, Qdos))
5564                     {
5565                       if (src < src_end
5566                           && *src == '\n')
5567                         c = *src++;
5568                       produced_chars--;
5569                     }
5570                   else if (EQ (eol_type, Qmac))
5571                     c = '\n';
5572                 }
5573               *dst++ = c;
5574             }
5575         }
5576       coding->consumed = coding->src_bytes;
5577       coding->consumed_char = coding->src_chars;
5578     }
5579
5580   produced = dst - (coding->destination + coding->produced);
5581   if (BUFFERP (coding->dst_object))
5582     insert_from_gap (produced_chars, produced);
5583   coding->produced += produced;
5584   coding->produced_char += produced_chars;
5585   return produced_chars;
5586 }
5587
5588 /* Compose text in CODING->object according to the annotation data at
5589    CHARBUF.  CHARBUF is an array:
5590      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5591  */
5592
5593 static INLINE void
5594 produce_composition (coding, charbuf)
5595      struct coding_system *coding;
5596      int *charbuf;
5597 {
5598   int len;
5599   EMACS_INT from, to;
5600   enum composition_method method;
5601   Lisp_Object components;
5602
5603   len = -charbuf[0];
5604   from = coding->dst_pos + charbuf[2];
5605   to = coding->dst_pos + charbuf[3];
5606   method = (enum composition_method) (charbuf[4]);
5607
5608   if (method == COMPOSITION_RELATIVE)
5609     components = Qnil;
5610   else
5611     {
5612       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5613       int i;
5614
5615       len -= 5;
5616       charbuf += 5;
5617       for (i = 0; i < len; i++)
5618         args[i] = make_number (charbuf[i]);
5619       components = (method == COMPOSITION_WITH_ALTCHARS
5620                     ? Fstring (len, args) : Fvector (len, args));
5621     }
5622   compose_text (from, to, components, Qnil, coding->dst_object);
5623 }
5624
5625
5626 /* Put `charset' property on text in CODING->object according to
5627    the annotation data at CHARBUF.  CHARBUF is an array:
5628      [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5629  */
5630
5631 static INLINE void
5632 produce_charset (coding, charbuf)
5633      struct coding_system *coding;
5634      int *charbuf;
5635 {
5636   EMACS_INT from = coding->dst_pos + charbuf[2];
5637   EMACS_INT to = coding->dst_pos + charbuf[3];
5638   struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
5639
5640   Fput_text_property (make_number (from), make_number (to),
5641                       Qcharset, CHARSET_NAME (charset),
5642                       coding->dst_object);
5643 }
5644
5645
5646 #define CHARBUF_SIZE 0x4000
5647
5648 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
5649   do {                                                                  \
5650     int size = CHARBUF_SIZE;;                                           \
5651                                                                         \
5652     coding->charbuf = NULL;                                             \
5653     while (size > 1024)                                                 \
5654       {                                                                 \
5655         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
5656         if (coding->charbuf)                                            \
5657           break;                                                        \
5658         size >>= 1;                                                     \
5659       }                                                                 \
5660     if (! coding->charbuf)                                              \
5661       {                                                                 \
5662         coding->result = CODING_RESULT_INSUFFICIENT_MEM;                \
5663         return coding->result;                                          \
5664       }                                                                 \
5665     coding->charbuf_size = size;                                        \
5666   } while (0)
5667
5668
5669 static void
5670 produce_annotation (coding)
5671      struct coding_system *coding;
5672 {
5673   int *charbuf = coding->charbuf;
5674   int *charbuf_end = charbuf + coding->charbuf_used;
5675
5676   if (NILP (coding->dst_object))
5677     return;
5678
5679   while (charbuf < charbuf_end)
5680     {
5681       if (*charbuf >= 0)
5682         charbuf++;
5683       else
5684         {
5685           int len = -*charbuf;
5686           switch (charbuf[1])
5687             {
5688             case CODING_ANNOTATE_COMPOSITION_MASK:
5689               produce_composition (coding, charbuf);
5690               break;
5691             case CODING_ANNOTATE_CHARSET_MASK:
5692               produce_charset (coding, charbuf);
5693               break;
5694             default:
5695               abort ();
5696             }
5697           charbuf += len;
5698         }
5699     }
5700 }
5701
5702 /* Decode the data at CODING->src_object into CODING->dst_object.
5703    CODING->src_object is a buffer, a string, or nil.
5704    CODING->dst_object is a buffer.
5705
5706    If CODING->src_object is a buffer, it must be the current buffer.
5707    In this case, if CODING->src_pos is positive, it is a position of
5708    the source text in the buffer, otherwise, the source text is in the
5709    gap area of the buffer, and CODING->src_pos specifies the offset of
5710    the text from GPT (which must be the same as PT).  If this is the
5711    same buffer as CODING->dst_object, CODING->src_pos must be
5712    negative.
5713
5714    If CODING->src_object is a string, CODING->src_pos in an index to
5715    that string.
5716
5717    If CODING->src_object is nil, CODING->source must already point to
5718    the non-relocatable memory area.  In this case, CODING->src_pos is
5719    an offset from CODING->source.
5720
5721    The decoded data is inserted at the current point of the buffer
5722    CODING->dst_object.
5723 */
5724
5725 static int
5726 decode_coding (coding)
5727      struct coding_system *coding;
5728 {
5729   Lisp_Object attrs;
5730
5731   if (BUFFERP (coding->src_object)
5732       && coding->src_pos > 0
5733       && coding->src_pos < GPT
5734       && coding->src_pos + coding->src_chars > GPT)
5735     move_gap_both (coding->src_pos, coding->src_pos_byte);
5736
5737   if (BUFFERP (coding->dst_object))
5738     {
5739       if (current_buffer != XBUFFER (coding->dst_object))
5740         set_buffer_internal (XBUFFER (coding->dst_object));
5741       if (GPT != PT)
5742         move_gap_both (PT, PT_BYTE);
5743     }
5744
5745   coding->consumed = coding->consumed_char = 0;
5746   coding->produced = coding->produced_char = 0;
5747   coding->chars_at_source = 0;
5748   coding->result = CODING_RESULT_SUCCESS;
5749   coding->errors = 0;
5750
5751   ALLOC_CONVERSION_WORK_AREA (coding);
5752
5753   attrs = CODING_ID_ATTRS (coding->id);
5754
5755   do
5756     {
5757       coding_set_source (coding);
5758       coding->annotated = 0;
5759       (*(coding->decoder)) (coding);
5760       if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5761         translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs));
5762       else if (!NILP (Vstandard_translation_table_for_decode))
5763         translate_chars (coding, Vstandard_translation_table_for_decode);
5764       coding_set_destination (coding);
5765       produce_chars (coding);
5766       if (coding->annotated)
5767         produce_annotation (coding);
5768     }
5769   while (coding->consumed < coding->src_bytes
5770          && ! coding->result);
5771
5772   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5773       && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5774       && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5775     decode_eol (coding);
5776
5777   coding->carryover_bytes = 0;
5778   if (coding->consumed < coding->src_bytes)
5779     {
5780       int nbytes = coding->src_bytes - coding->consumed;
5781       unsigned char *src;
5782
5783       coding_set_source (coding);
5784       coding_set_destination (coding);
5785       src = coding->source + coding->consumed;
5786
5787       if (coding->mode & CODING_MODE_LAST_BLOCK)
5788         {
5789           /* Flush out unprocessed data as binary chars.  We are sure
5790              that the number of data is less than the size of
5791              coding->charbuf.  */
5792           while (nbytes-- > 0)
5793             {
5794               int c = *src++;
5795
5796               coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
5797             }
5798           produce_chars (coding);
5799         }
5800       else
5801         {
5802           /* Record unprocessed bytes in coding->carryover.  We are
5803              sure that the number of data is less than the size of
5804              coding->carryover.  */
5805           unsigned char *p = coding->carryover;
5806
5807           coding->carryover_bytes = nbytes;
5808           while (nbytes-- > 0)
5809             *p++ = *src++;
5810         }
5811       coding->consumed = coding->src_bytes;
5812     }
5813
5814   return coding->result;
5815 }
5816
5817
5818 /* Extract an annotation datum from a composition starting at POS and
5819    ending before LIMIT of CODING->src_object (buffer or string), store
5820    the data in BUF, set *STOP to a starting position of the next
5821    composition (if any) or to LIMIT, and return the address of the
5822    next element of BUF.
5823
5824    If such an annotation is not found, set *STOP to a starting
5825    position of a composition after POS (if any) or to LIMIT, and
5826    return BUF.  */
5827
5828 static INLINE int *
5829 handle_composition_annotation (pos, limit, coding, buf, stop)
5830      EMACS_INT pos, limit;
5831      struct coding_system *coding;
5832      int *buf;
5833      EMACS_INT *stop;
5834 {
5835   EMACS_INT start, end;
5836   Lisp_Object prop;
5837
5838   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
5839       || end > limit)
5840     *stop = limit;
5841   else if (start > pos)
5842     *stop = start;
5843   else
5844     {
5845       if (start == pos)
5846         {
5847           /* We found a composition.  Store the corresponding
5848              annotation data in BUF.  */
5849           int *head = buf;
5850           enum composition_method method = COMPOSITION_METHOD (prop);
5851           int nchars = COMPOSITION_LENGTH (prop);
5852
5853           ADD_COMPOSITION_DATA (buf, 0, nchars, method);
5854           if (method != COMPOSITION_RELATIVE)
5855             {
5856               Lisp_Object components;
5857               int len, i, i_byte;
5858
5859               components = COMPOSITION_COMPONENTS (prop);
5860               if (VECTORP (components))
5861                 {
5862                   len = XVECTOR (components)->size;
5863                   for (i = 0; i < len; i++)
5864                     *buf++ = XINT (AREF (components, i));
5865                 }
5866               else if (STRINGP (components))
5867                 {
5868                   len = XSTRING (components)->size;
5869                   i = i_byte = 0;
5870                   while (i < len)
5871                     {
5872                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
5873                       buf++;
5874                     }
5875                 }
5876               else if (INTEGERP (components))
5877                 {
5878                   len = 1;
5879                   *buf++ = XINT (components);
5880                 }
5881               else if (CONSP (components))
5882                 {
5883                   for (len = 0; CONSP (components);
5884                        len++, components = XCDR (components))
5885                     *buf++ = XINT (XCAR (components));
5886                 }
5887               else
5888                 abort ();
5889               *head -= len;
5890             }
5891         }
5892
5893       if (find_composition (end, limit, &start, &end, &prop,
5894                             coding->src_object)
5895           && end <= limit)
5896         *stop = start;
5897       else
5898         *stop = limit;
5899     }
5900   return buf;
5901 }
5902
5903
5904 /* Extract an annotation datum from a text property `charset' at POS of
5905    CODING->src_object (buffer of string), store the data in BUF, set
5906    *STOP to the position where the value of `charset' property changes
5907    (limiting by LIMIT), and return the address of the next element of
5908    BUF.
5909
5910    If the property value is nil, set *STOP to the position where the
5911    property value is non-nil (limiting by LIMIT), and return BUF.  */
5912
5913 static INLINE int *
5914 handle_charset_annotation (pos, limit, coding, buf, stop)
5915      EMACS_INT pos, limit;
5916      struct coding_system *coding;
5917      int *buf;
5918      EMACS_INT *stop;
5919 {
5920   Lisp_Object val, next;
5921   int id;
5922
5923   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
5924   if (! NILP (val) && CHARSETP (val))
5925     id = XINT (CHARSET_SYMBOL_ID (val));
5926   else
5927     id = -1;
5928   ADD_CHARSET_DATA (buf, 0, 0, id);
5929   next = Fnext_single_property_change (make_number (pos), Qcharset,
5930                                        coding->src_object,
5931                                        make_number (limit));
5932   *stop = XINT (next);
5933   return buf;
5934 }
5935
5936
5937 static void
5938 consume_chars (coding)
5939      struct coding_system *coding;
5940 {
5941   int *buf = coding->charbuf;
5942   int *buf_end = coding->charbuf + coding->charbuf_size;
5943   const unsigned char *src = coding->source + coding->consumed;
5944   EMACS_INT pos = coding->src_pos + coding->consumed_char;
5945   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
5946   int multibytep = coding->src_multibyte;
5947   Lisp_Object eol_type;
5948   int c;
5949   EMACS_INT stop, stop_composition, stop_charset;
5950   int id;
5951
5952   eol_type = CODING_ID_EOL_TYPE (coding->id);
5953   if (VECTORP (eol_type))
5954     eol_type = Qunix;
5955
5956   /* Note: composition handling is not yet implemented.  */
5957   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
5958
5959   if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
5960     stop = stop_composition = pos;
5961   else
5962     stop = stop_composition = end_pos;
5963   if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
5964     stop = stop_charset = pos;
5965   else
5966     stop_charset = end_pos;
5967
5968   /* Compensate for CRLF and annotation.  */
5969   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
5970   while (buf < buf_end)
5971     {
5972       if (pos == stop)
5973         {
5974           int *p;
5975
5976           if (pos == end_pos)
5977             break;
5978           if (pos == stop_composition)
5979             buf = handle_composition_annotation (pos, end_pos, coding,
5980                                                  buf, &stop_composition);
5981           if (pos == stop_charset)
5982             buf = handle_charset_annotation (pos, end_pos, coding,
5983                                              buf, &stop_charset);
5984           stop = (stop_composition < stop_charset
5985                   ? stop_composition : stop_charset);
5986         }
5987
5988       if (! multibytep)
5989         c = *src++;
5990       else
5991         c = STRING_CHAR_ADVANCE (src);
5992       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5993         c = '\n';
5994       if (! EQ (eol_type, Qunix))
5995         {
5996           if (c == '\n')
5997             {
5998               if (EQ (eol_type, Qdos))
5999                 *buf++ = '\r';
6000               else
6001                 c = '\r';
6002             }
6003         }
6004       *buf++ = c;
6005       pos++;
6006     }
6007
6008   coding->consumed = src - coding->source;
6009   coding->consumed_char = pos - coding->src_pos;
6010   coding->charbuf_used = buf - coding->charbuf;
6011   coding->chars_at_source = 0;
6012 }
6013
6014
6015 /* Encode the text at CODING->src_object into CODING->dst_object.
6016    CODING->src_object is a buffer or a string.
6017    CODING->dst_object is a buffer or nil.
6018
6019    If CODING->src_object is a buffer, it must be the current buffer.
6020    In this case, if CODING->src_pos is positive, it is a position of
6021    the source text in the buffer, otherwise. the source text is in the
6022    gap area of the buffer, and coding->src_pos specifies the offset of
6023    the text from GPT (which must be the same as PT).  If this is the
6024    same buffer as CODING->dst_object, CODING->src_pos must be
6025    negative and CODING should not have `pre-write-conversion'.
6026
6027    If CODING->src_object is a string, CODING should not have
6028    `pre-write-conversion'.
6029
6030    If CODING->dst_object is a buffer, the encoded data is inserted at
6031    the current point of that buffer.
6032
6033    If CODING->dst_object is nil, the encoded data is placed at the
6034    memory area specified by CODING->destination.  */
6035
6036 static int
6037 encode_coding (coding)
6038      struct coding_system *coding;
6039 {
6040   Lisp_Object attrs;
6041
6042   attrs = CODING_ID_ATTRS (coding->id);
6043
6044   if (BUFFERP (coding->dst_object))
6045     {
6046       set_buffer_internal (XBUFFER (coding->dst_object));
6047       coding->dst_multibyte
6048         = ! NILP (current_buffer->enable_multibyte_characters);
6049     }
6050
6051   coding->consumed = coding->consumed_char = 0;
6052   coding->produced = coding->produced_char = 0;
6053   coding->result = CODING_RESULT_SUCCESS;
6054   coding->errors = 0;
6055
6056   ALLOC_CONVERSION_WORK_AREA (coding);
6057
6058   do {
6059     coding_set_source (coding);
6060     consume_chars (coding);
6061
6062     if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
6063       translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs));
6064     else if (!NILP (Vstandard_translation_table_for_encode))
6065       translate_chars (coding, Vstandard_translation_table_for_encode);
6066
6067     coding_set_destination (coding);
6068     (*(coding->encoder)) (coding);
6069   } while (coding->consumed_char < coding->src_chars);
6070
6071   if (BUFFERP (coding->dst_object))
6072     insert_from_gap (coding->produced_char, coding->produced);
6073
6074   return (coding->result);
6075 }
6076
6077 /* Work buffer */
6078
6079 /* List of currently used working buffer.  */
6080 Lisp_Object Vcode_conversion_work_buf_list;
6081
6082 /* A working buffer used by the top level conversion.  */
6083 Lisp_Object Vcode_conversion_reused_work_buf;
6084
6085
6086 /* Return a working buffer that can be freely used by the following
6087    code conversion.  MULTIBYTEP specifies the multibyteness of the
6088    buffer.  */
6089
6090 Lisp_Object
6091 make_conversion_work_buffer (multibytep)
6092      int multibytep;
6093 {
6094   struct buffer *current = current_buffer;
6095   Lisp_Object buf;
6096
6097   if (NILP (Vcode_conversion_work_buf_list))
6098     {
6099       if (NILP (Vcode_conversion_reused_work_buf))
6100         Vcode_conversion_reused_work_buf
6101           = Fget_buffer_create (build_string (" *code-conversion-work*"));
6102       Vcode_conversion_work_buf_list
6103         = Fcons (Vcode_conversion_reused_work_buf, Qnil);
6104     }
6105   else
6106     {
6107       int depth = XINT (Flength (Vcode_conversion_work_buf_list));
6108       char str[128];
6109
6110       sprintf (str, " *code-conversion-work*<%d>", depth);
6111       Vcode_conversion_work_buf_list
6112         = Fcons (Fget_buffer_create (build_string (str)),
6113                  Vcode_conversion_work_buf_list);
6114     }
6115
6116   buf = XCAR (Vcode_conversion_work_buf_list);
6117   set_buffer_internal (XBUFFER (buf));
6118   current_buffer->undo_list = Qt;
6119   Ferase_buffer ();
6120   Fset_buffer_multibyte (multibytep ? Qt : Qnil, Qnil);
6121   set_buffer_internal (current);
6122   return buf;
6123 }
6124
6125 static struct coding_system *saved_coding;
6126
6127 Lisp_Object
6128 code_conversion_restore (info)
6129      Lisp_Object info;
6130 {
6131   int depth = XINT (Flength (Vcode_conversion_work_buf_list));
6132   Lisp_Object buf;
6133
6134   if (depth > 0)
6135     {
6136       buf = XCAR (Vcode_conversion_work_buf_list);
6137       Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
6138       if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
6139         Fkill_buffer (buf);
6140     }
6141
6142   if (EQ (saved_coding->dst_object, Qt)
6143       && saved_coding->destination)
6144     xfree (saved_coding->destination);
6145
6146   return save_excursion_restore (info);
6147 }
6148
6149
6150 int
6151 decode_coding_gap (coding, chars, bytes)
6152      struct coding_system *coding;
6153      EMACS_INT chars, bytes;
6154 {
6155   int count = specpdl_ptr - specpdl;
6156
6157   saved_coding = coding;
6158   record_unwind_protect (code_conversion_restore, save_excursion_save ());
6159
6160   coding->src_object = Fcurrent_buffer ();
6161   coding->src_chars = chars;
6162   coding->src_bytes = bytes;
6163   coding->src_pos = -chars;
6164   coding->src_pos_byte = -bytes;
6165   coding->src_multibyte = chars < bytes;
6166   coding->dst_object = coding->src_object;
6167   coding->dst_pos = PT;
6168   coding->dst_pos_byte = PT_BYTE;
6169   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6170   coding->mode |= CODING_MODE_LAST_BLOCK;
6171
6172   if (CODING_REQUIRE_DETECTION (coding))
6173     detect_coding (coding);
6174
6175   decode_coding (coding);
6176
6177   unbind_to (count, Qnil);
6178   return coding->result;
6179 }
6180
6181 int
6182 encode_coding_gap (coding, chars, bytes)
6183      struct coding_system *coding;
6184      EMACS_INT chars, bytes;
6185 {
6186   int count = specpdl_ptr - specpdl;
6187   Lisp_Object buffer;
6188
6189   saved_coding = coding;
6190   record_unwind_protect (code_conversion_restore, save_excursion_save ());
6191
6192   buffer = Fcurrent_buffer ();
6193   coding->src_object = buffer;
6194   coding->src_chars = chars;
6195   coding->src_bytes = bytes;
6196   coding->src_pos = -chars;
6197   coding->src_pos_byte = -bytes;
6198   coding->src_multibyte = chars < bytes;
6199   coding->dst_object = coding->src_object;
6200   coding->dst_pos = PT;
6201   coding->dst_pos_byte = PT_BYTE;
6202
6203   encode_coding (coding);
6204
6205   unbind_to (count, Qnil);
6206   return coding->result;
6207 }
6208
6209
6210 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6211    SRC_OBJECT into DST_OBJECT by coding context CODING.
6212
6213    SRC_OBJECT is a buffer, a string, or Qnil.
6214
6215    If it is a buffer, the text is at point of the buffer.  FROM and TO
6216    are positions in the buffer.
6217
6218    If it is a string, the text is at the beginning of the string.
6219    FROM and TO are indices to the string.
6220
6221    If it is nil, the text is at coding->source.  FROM and TO are
6222    indices to coding->source.
6223
6224    DST_OBJECT is a buffer, Qt, or Qnil.
6225
6226    If it is a buffer, the decoded text is inserted at point of the
6227    buffer.  If the buffer is the same as SRC_OBJECT, the source text
6228    is deleted.
6229
6230    If it is Qt, a string is made from the decoded text, and
6231    set in CODING->dst_object.
6232
6233    If it is Qnil, the decoded text is stored at CODING->destination.
6234    The called must allocate CODING->dst_bytes bytes at
6235    CODING->destination by xmalloc.  If the decoded text is longer than
6236    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6237  */
6238
6239 void
6240 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6241                       dst_object)
6242      struct coding_system *coding;
6243      Lisp_Object src_object;
6244      EMACS_INT from, from_byte, to, to_byte;
6245      Lisp_Object dst_object;
6246 {
6247   int count = specpdl_ptr - specpdl;
6248   unsigned char *destination;
6249   EMACS_INT dst_bytes;
6250   EMACS_INT chars = to - from;
6251   EMACS_INT bytes = to_byte - from_byte;
6252   Lisp_Object attrs;
6253
6254   saved_coding = coding;
6255   record_unwind_protect (code_conversion_restore, save_excursion_save ());
6256
6257   if (NILP (dst_object))
6258     {
6259       destination = coding->destination;
6260       dst_bytes = coding->dst_bytes;
6261     }
6262
6263   coding->src_object = src_object;
6264   coding->src_chars = chars;
6265   coding->src_bytes = bytes;
6266   coding->src_multibyte = chars < bytes;
6267
6268   if (STRINGP (src_object))
6269     {
6270       coding->src_pos = from;
6271       coding->src_pos_byte = from_byte;
6272     }
6273   else if (BUFFERP (src_object))
6274     {
6275       set_buffer_internal (XBUFFER (src_object));
6276       if (from != GPT)
6277         move_gap_both (from, from_byte);
6278       if (EQ (src_object, dst_object))
6279         {
6280           TEMP_SET_PT_BOTH (from, from_byte);
6281           del_range_both (from, from_byte, to, to_byte, 1);
6282           coding->src_pos = -chars;
6283           coding->src_pos_byte = -bytes;
6284         }
6285       else
6286         {
6287           coding->src_pos = from;
6288           coding->src_pos_byte = from_byte;
6289         }
6290     }
6291
6292   if (CODING_REQUIRE_DETECTION (coding))
6293     detect_coding (coding);
6294   attrs = CODING_ID_ATTRS (coding->id);
6295
6296   if (! NILP (CODING_ATTR_POST_READ (attrs))
6297       || EQ (dst_object, Qt))
6298     {
6299       coding->dst_object = make_conversion_work_buffer (1);
6300       coding->dst_pos = BEG;
6301       coding->dst_pos_byte = BEG_BYTE;
6302       coding->dst_multibyte = 1;
6303     }
6304   else if (BUFFERP (dst_object))
6305     {
6306       coding->dst_object = dst_object;
6307       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6308       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6309       coding->dst_multibyte
6310         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6311     }
6312   else
6313     {
6314       coding->dst_object = Qnil;
6315       coding->dst_multibyte = 1;
6316     }
6317
6318   decode_coding (coding);
6319
6320   if (BUFFERP (coding->dst_object))
6321     set_buffer_internal (XBUFFER (coding->dst_object));
6322
6323   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6324     {
6325       struct gcpro gcpro1, gcpro2;
6326       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6327       Lisp_Object val;
6328
6329       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6330       GCPRO2 (coding->src_object, coding->dst_object);
6331       val = call1 (CODING_ATTR_POST_READ (attrs),
6332                    make_number (coding->produced_char));
6333       UNGCPRO;
6334       CHECK_NATNUM (val);
6335       coding->produced_char += Z - prev_Z;
6336       coding->produced += Z_BYTE - prev_Z_BYTE;
6337     }
6338
6339   if (EQ (dst_object, Qt))
6340     {
6341       coding->dst_object = Fbuffer_string ();
6342     }
6343   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6344     {
6345       set_buffer_internal (XBUFFER (coding->dst_object));
6346       if (dst_bytes < coding->produced)
6347         {
6348           destination
6349             = (unsigned char *) xrealloc (destination, coding->produced);
6350           if (! destination)
6351             {
6352               coding->result = CODING_RESULT_INSUFFICIENT_DST;
6353               unbind_to (count, Qnil);
6354               return;
6355             }
6356           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6357             move_gap_both (BEGV, BEGV_BYTE);
6358           bcopy (BEGV_ADDR, destination, coding->produced);
6359           coding->destination = destination;
6360         }
6361     }
6362
6363   unbind_to (count, Qnil);
6364 }
6365
6366
6367 void
6368 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6369                       dst_object)
6370      struct coding_system *coding;
6371      Lisp_Object src_object;
6372      EMACS_INT from, from_byte, to, to_byte;
6373      Lisp_Object dst_object;
6374 {
6375   int count = specpdl_ptr - specpdl;
6376   EMACS_INT chars = to - from;
6377   EMACS_INT bytes = to_byte - from_byte;
6378   Lisp_Object attrs;
6379
6380   saved_coding = coding;
6381   record_unwind_protect (code_conversion_restore, save_excursion_save ());
6382
6383   coding->src_object = src_object;
6384   coding->src_chars = chars;
6385   coding->src_bytes = bytes;
6386   coding->src_multibyte = chars < bytes;
6387
6388   attrs = CODING_ID_ATTRS (coding->id);
6389
6390   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6391     {
6392       coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
6393       set_buffer_internal (XBUFFER (coding->src_object));
6394       if (STRINGP (src_object))
6395         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6396       else if (BUFFERP (src_object))
6397         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6398       else
6399         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6400
6401       if (EQ (src_object, dst_object))
6402         {
6403           set_buffer_internal (XBUFFER (src_object));
6404           del_range_both (from, from_byte, to, to_byte, 1);
6405           set_buffer_internal (XBUFFER (coding->src_object));
6406         }
6407
6408       call2 (CODING_ATTR_PRE_WRITE (attrs),
6409              make_number (BEG), make_number (Z));
6410       coding->src_object = Fcurrent_buffer ();
6411       if (BEG != GPT)
6412         move_gap_both (BEG, BEG_BYTE);
6413       coding->src_chars = Z - BEG;
6414       coding->src_bytes = Z_BYTE - BEG_BYTE;
6415       coding->src_pos = BEG;
6416       coding->src_pos_byte = BEG_BYTE;
6417       coding->src_multibyte = Z < Z_BYTE;
6418     }
6419   else if (STRINGP (src_object))
6420     {
6421       coding->src_pos = from;
6422       coding->src_pos_byte = from_byte;
6423     }
6424   else if (BUFFERP (src_object))
6425     {
6426       set_buffer_internal (XBUFFER (src_object));
6427       if (EQ (src_object, dst_object))
6428         {
6429           coding->src_object = del_range_1 (from, to, 1, 1);
6430           coding->src_pos = 0;
6431           coding->src_pos_byte = 0;
6432         }
6433       else
6434         {
6435           if (from < GPT && to >= GPT)
6436             move_gap_both (from, from_byte);
6437           coding->src_pos = from;
6438           coding->src_pos_byte = from_byte;
6439         }
6440     }
6441
6442   if (BUFFERP (dst_object))
6443     {
6444       coding->dst_object = dst_object;
6445       if (EQ (src_object, dst_object))
6446         {
6447           coding->dst_pos = from;
6448           coding->dst_pos_byte = from_byte;
6449         }
6450       else
6451         {
6452           coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6453           coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6454         }
6455       coding->dst_multibyte
6456         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6457     }
6458   else if (EQ (dst_object, Qt))
6459     {
6460       coding->dst_object = Qnil;
6461       coding->dst_bytes = coding->src_chars;
6462       if (coding->dst_bytes == 0)
6463         coding->dst_bytes = 1;
6464       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
6465       coding->dst_multibyte = 0;
6466     }
6467   else
6468     {
6469       coding->dst_object = Qnil;
6470       coding->dst_multibyte = 0;
6471     }
6472
6473   encode_coding (coding);
6474
6475   if (EQ (dst_object, Qt))
6476     {
6477       if (BUFFERP (coding->dst_object))
6478         coding->dst_object = Fbuffer_string ();
6479       else
6480         {
6481           coding->dst_object
6482             = make_unibyte_string ((char *) coding->destination,
6483                                    coding->produced);
6484           xfree (coding->destination);
6485         }
6486     }
6487
6488   unbind_to (count, Qnil);
6489 }
6490
6491
6492 Lisp_Object
6493 preferred_coding_system ()
6494 {
6495   int id = coding_categories[coding_priorities[0]].id;
6496
6497   return CODING_ID_NAME (id);
6498 }
6499
6500 \f
6501 #ifdef emacs
6502 /*** 8. Emacs Lisp library functions ***/
6503
6504 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6505        doc: /* Return t if OBJECT is nil or a coding-system.
6506 See the documentation of `define-coding-system' for information
6507 about coding-system objects.  */)
6508      (obj)
6509      Lisp_Object obj;
6510 {
6511   return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6512 }
6513
6514 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6515        Sread_non_nil_coding_system, 1, 1, 0,
6516        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6517      (prompt)
6518      Lisp_Object prompt;
6519 {
6520   Lisp_Object val;
6521   do
6522     {
6523       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6524                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6525     }
6526   while (XSTRING (val)->size == 0);
6527   return (Fintern (val, Qnil));
6528 }
6529
6530 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6531        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6532 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6533      (prompt, default_coding_system)
6534      Lisp_Object prompt, default_coding_system;
6535 {
6536   Lisp_Object val;
6537   if (SYMBOLP (default_coding_system))
6538     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
6539   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6540                           Qt, Qnil, Qcoding_system_history,
6541                           default_coding_system, Qnil);
6542   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
6543 }
6544
6545 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6546        1, 1, 0,
6547        doc: /* Check validity of CODING-SYSTEM.
6548 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.  */)
6549   (coding_system)
6550      Lisp_Object coding_system;
6551 {
6552   CHECK_SYMBOL (coding_system);
6553   if (!NILP (Fcoding_system_p (coding_system)))
6554     return coding_system;
6555   while (1)
6556     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6557 }
6558
6559 \f
6560 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
6561    HIGHEST is nonzero, return the coding system of the highest
6562    priority among the detected coding systems.  Otherwize return a
6563    list of detected coding systems sorted by their priorities.  If
6564    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6565    multibyte form but contains only ASCII and eight-bit chars.
6566    Otherwise, the bytes are raw bytes.
6567
6568    CODING-SYSTEM controls the detection as below:
6569
6570    If it is nil, detect both text-format and eol-format.  If the
6571    text-format part of CODING-SYSTEM is already specified
6572    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
6573    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6574    detect only text-format.  */
6575
6576 Lisp_Object
6577 detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
6578      unsigned char *src;
6579      int src_bytes, highest;
6580      int multibytep;
6581      Lisp_Object coding_system;
6582 {
6583   unsigned char *src_end = src + src_bytes;
6584   int mask = CATEGORY_MASK_ANY;
6585   Lisp_Object attrs, eol_type;
6586   Lisp_Object val;
6587   struct coding_system coding;
6588   int id;
6589   struct coding_detection_info detect_info;
6590
6591   if (NILP (coding_system))
6592     coding_system = Qundecided;
6593   setup_coding_system (coding_system, &coding);
6594   attrs = CODING_ID_ATTRS (coding.id);
6595   eol_type = CODING_ID_EOL_TYPE (coding.id);
6596   coding_system = CODING_ATTR_BASE_NAME (attrs);
6597
6598   coding.source = src;
6599   coding.src_bytes = src_bytes;
6600   coding.src_multibyte = multibytep;
6601   coding.consumed = 0;
6602   coding.mode |= CODING_MODE_LAST_BLOCK;
6603
6604   detect_info.checked = detect_info.found = detect_info.rejected = 0;
6605
6606   /* At first, detect text-format if necessary.  */
6607   if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided)
6608     {
6609       enum coding_category category;
6610       struct coding_system *this;
6611       int c, i;
6612
6613       for (; src < src_end; src++)
6614         {
6615           c = *src;
6616           if (c & 0x80
6617               || (c < 0x20 && (c == ISO_CODE_ESC
6618                                || c == ISO_CODE_SI
6619                                || c == ISO_CODE_SO)))
6620             break;
6621         }
6622       coding.head_ascii = src - coding.source;
6623
6624       if (src < src_end)
6625         for (i = 0; i < coding_category_raw_text; i++)
6626           {
6627             category = coding_priorities[i];
6628             this = coding_categories + category;
6629
6630             if (this->id < 0)
6631               {
6632                 /* No coding system of this category is defined.  */
6633                 detect_info.rejected |= (1 << category);
6634               }
6635             else if (category >= coding_category_raw_text)
6636               continue;
6637             else if (detect_info.checked & (1 << category))
6638               {
6639                 if (highest
6640                     && (detect_info.found & (1 << category)))
6641                   break;
6642               }
6643             else
6644               {
6645                 if ((*(this->detector)) (&coding, &detect_info)
6646                     && highest
6647                     && (detect_info.found & (1 << category)))
6648                   break;
6649               }
6650           }
6651
6652
6653       if (detect_info.rejected == CATEGORY_MASK_ANY)
6654         {
6655           detect_info.found = CATEGORY_MASK_RAW_TEXT;
6656           id = coding_categories[coding_category_raw_text].id;
6657           val = Fcons (make_number (id), Qnil);
6658         }
6659       else if (! detect_info.rejected && ! detect_info.found)
6660         {
6661           detect_info.found = CATEGORY_MASK_ANY;
6662           id = coding_categories[coding_category_undecided].id;
6663           val = Fcons (make_number (id), Qnil);
6664         }
6665       else if (highest)
6666         {
6667           if (detect_info.found)
6668             {
6669               detect_info.found = 1 << category;
6670               val = Fcons (make_number (this->id), Qnil);
6671             }
6672           else
6673             for (i = 0; i < coding_category_raw_text; i++)
6674               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6675                 {
6676                   detect_info.found = 1 << coding_priorities[i];
6677                   id = coding_categories[coding_priorities[i]].id;
6678                   val = Fcons (make_number (id), Qnil);
6679                   break;
6680                 }
6681         }
6682       else
6683         {
6684           int mask = detect_info.rejected | detect_info.found;
6685           int found = 0;
6686           val = Qnil;
6687
6688           for (i = coding_category_raw_text - 1; i >= 0; i--)
6689             {
6690               category = coding_priorities[i];
6691               if (! (mask & (1 << category)))
6692                 {
6693                   found |= 1 << category;
6694                   id = coding_categories[category].id;
6695                   val = Fcons (make_number (id), val);
6696                 }
6697             }
6698           for (i = coding_category_raw_text - 1; i >= 0; i--)
6699             {
6700               category = coding_priorities[i];
6701               if (detect_info.found & (1 << category))
6702                 {
6703                   id = coding_categories[category].id;
6704                   val = Fcons (make_number (id), val);
6705                 }
6706             }
6707           detect_info.found |= found;
6708         }
6709     }
6710   else
6711     {
6712       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
6713       val = Fcons (make_number (coding.id), Qnil);
6714     }
6715
6716   /* Then, detect eol-format if necessary.  */
6717   {
6718     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
6719     Lisp_Object tail;
6720
6721     if (VECTORP (eol_type))
6722       {
6723         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
6724           normal_eol = detect_eol (coding.source, src_bytes,
6725                                    coding_category_raw_text);
6726         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
6727                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
6728           utf_16_be_eol = detect_eol (coding.source, src_bytes,
6729                                       coding_category_utf_16_be);
6730         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
6731                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
6732           utf_16_le_eol = detect_eol (coding.source, src_bytes,
6733                                       coding_category_utf_16_le);
6734       }
6735     else
6736       {
6737         if (EQ (eol_type, Qunix))
6738           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
6739         else if (EQ (eol_type, Qdos))
6740           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
6741         else
6742           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
6743       }
6744
6745     for (tail = val; CONSP (tail); tail = XCDR (tail))
6746       {
6747         enum coding_category category;
6748         int this_eol;
6749
6750         id = XINT (XCAR (tail));
6751         attrs = CODING_ID_ATTRS (id);
6752         category = XINT (CODING_ATTR_CATEGORY (attrs));
6753         eol_type = CODING_ID_EOL_TYPE (id);
6754         if (VECTORP (eol_type))
6755           {
6756             if (category == coding_category_utf_16_be
6757                 || category == coding_category_utf_16_be_nosig)
6758               this_eol = utf_16_be_eol;
6759             else if (category == coding_category_utf_16_le
6760                      || category == coding_category_utf_16_le_nosig)
6761               this_eol = utf_16_le_eol;
6762             else
6763               this_eol = normal_eol;
6764
6765             if (this_eol == EOL_SEEN_LF)
6766               XSETCAR (tail, AREF (eol_type, 0));
6767             else if (this_eol == EOL_SEEN_CRLF)
6768               XSETCAR (tail, AREF (eol_type, 1));
6769             else if (this_eol == EOL_SEEN_CR)
6770               XSETCAR (tail, AREF (eol_type, 2));
6771             else
6772               XSETCAR (tail, CODING_ID_NAME (id));
6773           }
6774         else
6775           XSETCAR (tail, CODING_ID_NAME (id));
6776       }
6777   }
6778
6779   return (highest ? XCAR (val) : val);
6780 }
6781
6782
6783 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6784        2, 3, 0,
6785        doc: /* Detect coding system of the text in the region between START and END.
6786 Return a list of possible coding systems ordered by priority.
6787
6788 If only ASCII characters are found, it returns a list of single element
6789 `undecided' or its subsidiary coding system according to a detected
6790 end-of-line format.
6791
6792 If optional argument HIGHEST is non-nil, return the coding system of
6793 highest priority.  */)
6794      (start, end, highest)
6795      Lisp_Object start, end, highest;
6796 {
6797   int from, to;
6798   int from_byte, to_byte;
6799
6800   CHECK_NUMBER_COERCE_MARKER (start);
6801   CHECK_NUMBER_COERCE_MARKER (end);
6802
6803   validate_region (&start, &end);
6804   from = XINT (start), to = XINT (end);
6805   from_byte = CHAR_TO_BYTE (from);
6806   to_byte = CHAR_TO_BYTE (to);
6807
6808   if (from < GPT && to >= GPT)
6809     move_gap_both (to, to_byte);
6810
6811   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6812                                to_byte - from_byte,
6813                                !NILP (highest),
6814                                !NILP (current_buffer
6815                                       ->enable_multibyte_characters),
6816                                Qnil);
6817 }
6818
6819 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6820        1, 2, 0,
6821        doc: /* Detect coding system of the text in STRING.
6822 Return a list of possible coding systems ordered by priority.
6823
6824 If only ASCII characters are found, it returns a list of single element
6825 `undecided' or its subsidiary coding system according to a detected
6826 end-of-line format.
6827
6828 If optional argument HIGHEST is non-nil, return the coding system of
6829 highest priority.  */)
6830      (string, highest)
6831      Lisp_Object string, highest;
6832 {
6833   CHECK_STRING (string);
6834
6835   return detect_coding_system (XSTRING (string)->data,
6836                                STRING_BYTES (XSTRING (string)),
6837                                !NILP (highest),
6838                                STRING_MULTIBYTE (string),
6839                                Qnil);
6840 }
6841
6842
6843 static INLINE int
6844 char_encodable_p (c, attrs)
6845      int c;
6846      Lisp_Object attrs;
6847 {
6848   Lisp_Object tail;
6849   struct charset *charset;
6850
6851   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6852        CONSP (tail); tail = XCDR (tail))
6853     {
6854       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6855       if (CHAR_CHARSET_P (c, charset))
6856         break;
6857     }
6858   return (! NILP (tail));
6859 }
6860
6861
6862 /* Return a list of coding systems that safely encode the text between
6863    START and END.  If EXCLUDE is non-nil, it is a list of coding
6864    systems not to check.  The returned list doesn't contain any such
6865    coding systems.  In any case, if the text contains only ASCII or is
6866    unibyte, return t.  */
6867
6868 DEFUN ("find-coding-systems-region-internal",
6869        Ffind_coding_systems_region_internal,
6870        Sfind_coding_systems_region_internal, 2, 3, 0,
6871        doc: /* Internal use only.  */)
6872      (start, end, exclude)
6873      Lisp_Object start, end, exclude;
6874 {
6875   Lisp_Object coding_attrs_list, safe_codings;
6876   EMACS_INT start_byte, end_byte;
6877   const unsigned char *p, *pbeg, *pend;
6878   int c;
6879   Lisp_Object tail, elt;
6880
6881   if (STRINGP (start))
6882     {
6883       if (!STRING_MULTIBYTE (start)
6884           || XSTRING (start)->size == STRING_BYTES (XSTRING (start)))
6885         return Qt;
6886       start_byte = 0;
6887       end_byte = STRING_BYTES (XSTRING (start));
6888     }
6889   else
6890     {
6891       CHECK_NUMBER_COERCE_MARKER (start);
6892       CHECK_NUMBER_COERCE_MARKER (end);
6893       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6894         args_out_of_range (start, end);
6895       if (NILP (current_buffer->enable_multibyte_characters))
6896         return Qt;
6897       start_byte = CHAR_TO_BYTE (XINT (start));
6898       end_byte = CHAR_TO_BYTE (XINT (end));
6899       if (XINT (end) - XINT (start) == end_byte - start_byte)
6900         return Qt;
6901
6902       if (XINT (start) < GPT && XINT (end) > GPT)
6903         {
6904           if ((GPT - XINT (start)) < (XINT (end) - GPT))
6905             move_gap_both (XINT (start), start_byte);
6906           else
6907             move_gap_both (XINT (end), end_byte);
6908         }
6909     }
6910
6911   coding_attrs_list = Qnil;
6912   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6913     if (NILP (exclude)
6914         || NILP (Fmemq (XCAR (tail), exclude)))
6915       {
6916         Lisp_Object attrs;
6917
6918         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6919         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6920             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6921           coding_attrs_list = Fcons (attrs, coding_attrs_list);
6922       }
6923
6924   if (STRINGP (start))
6925     p = pbeg = XSTRING (start)->data;
6926   else
6927     p = pbeg = BYTE_POS_ADDR (start_byte);
6928   pend = p + (end_byte - start_byte);
6929
6930   while (p < pend && ASCII_BYTE_P (*p)) p++;
6931   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6932
6933   while (p < pend)
6934     {
6935       if (ASCII_BYTE_P (*p))
6936         p++;
6937       else
6938         {
6939           c = STRING_CHAR_ADVANCE (p);
6940
6941           charset_map_loaded = 0;
6942           for (tail = coding_attrs_list; CONSP (tail);)
6943             {
6944               elt = XCAR (tail);
6945               if (NILP (elt))
6946                 tail = XCDR (tail);
6947               else if (char_encodable_p (c, elt))
6948                 tail = XCDR (tail);
6949               else if (CONSP (XCDR (tail)))
6950                 {
6951                   XSETCAR (tail, XCAR (XCDR (tail)));
6952                   XSETCDR (tail, XCDR (XCDR (tail)));
6953                 }
6954               else
6955                 {
6956                   XSETCAR (tail, Qnil);
6957                   tail = XCDR (tail);
6958                 }
6959             }
6960           if (charset_map_loaded)
6961             {
6962               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6963
6964               if (STRINGP (start))
6965                 pbeg = XSTRING (start)->data;
6966               else
6967                 pbeg = BYTE_POS_ADDR (start_byte);
6968               p = pbeg + p_offset;
6969               pend = pbeg + pend_offset;
6970             }
6971         }
6972     }
6973
6974   safe_codings = Qnil;
6975   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6976     if (! NILP (XCAR (tail)))
6977       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6978
6979   return safe_codings;
6980 }
6981
6982
6983 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6984        Scheck_coding_systems_region, 3, 3, 0,
6985        doc: /* Check if the region is encodable by coding systems.
6986
6987 START and END are buffer positions specifying the region.
6988 CODING-SYSTEM-LIST is a list of coding systems to check.
6989
6990 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6991 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6992 whole region, POS0, POS1, ... are buffer positions where non-encodable
6993 characters are found.
6994
6995 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6996 value is nil.
6997
6998 START may be a string.  In that case, check if the string is
6999 encodable, and the value contains indices to the string instead of
7000 buffer positions.  END is ignored.  */)
7001      (start, end, coding_system_list)
7002      Lisp_Object start, end, coding_system_list;
7003 {
7004   Lisp_Object list;
7005   EMACS_INT start_byte, end_byte;
7006   int pos;
7007   const unsigned char *p, *pbeg, *pend;
7008   int c;
7009   Lisp_Object tail, elt;
7010
7011   if (STRINGP (start))
7012     {
7013       if (!STRING_MULTIBYTE (start)
7014           && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
7015         return Qnil;
7016       start_byte = 0;
7017       end_byte = STRING_BYTES (XSTRING (start));
7018       pos = 0;
7019     }
7020   else
7021     {
7022       CHECK_NUMBER_COERCE_MARKER (start);
7023       CHECK_NUMBER_COERCE_MARKER (end);
7024       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7025         args_out_of_range (start, end);
7026       if (NILP (current_buffer->enable_multibyte_characters))
7027         return Qnil;
7028       start_byte = CHAR_TO_BYTE (XINT (start));
7029       end_byte = CHAR_TO_BYTE (XINT (end));
7030       if (XINT (end) - XINT (start) == end_byte - start_byte)
7031         return Qt;
7032
7033       if (XINT (start) < GPT && XINT (end) > GPT)
7034         {
7035           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7036             move_gap_both (XINT (start), start_byte);
7037           else
7038             move_gap_both (XINT (end), end_byte);
7039         }
7040       pos = XINT (start);
7041     }
7042
7043   list = Qnil;
7044   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7045     {
7046       elt = XCAR (tail);
7047       list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
7048                                        Qnil)),
7049                     list);
7050     }
7051
7052   if (STRINGP (start))
7053     p = pbeg = XSTRING (start)->data;
7054   else
7055     p = pbeg = BYTE_POS_ADDR (start_byte);
7056   pend = p + (end_byte - start_byte);
7057
7058   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7059   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7060
7061   while (p < pend)
7062     {
7063       if (ASCII_BYTE_P (*p))
7064         p++;
7065       else
7066         {
7067           c = STRING_CHAR_ADVANCE (p);
7068
7069           charset_map_loaded = 0;
7070           for (tail = list; CONSP (tail); tail = XCDR (tail))
7071             {
7072               elt = XCDR (XCAR (tail));
7073               if (! char_encodable_p (c, XCAR (elt)))
7074                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7075             }
7076           if (charset_map_loaded)
7077             {
7078               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7079
7080               if (STRINGP (start))
7081                 pbeg = XSTRING (start)->data;
7082               else
7083                 pbeg = BYTE_POS_ADDR (start_byte);
7084               p = pbeg + p_offset;
7085               pend = pbeg + pend_offset;
7086             }
7087         }
7088       pos++;
7089     }
7090
7091   tail = list;
7092   list = Qnil;
7093   for (; CONSP (tail); tail = XCDR (tail))
7094     {
7095       elt = XCAR (tail);
7096       if (CONSP (XCDR (XCDR (elt))))
7097         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7098                       list);
7099     }
7100
7101   return list;
7102 }
7103
7104
7105
7106 Lisp_Object
7107 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7108      Lisp_Object start, end, coding_system, dst_object;
7109      int encodep, norecord;
7110 {
7111   struct coding_system coding;
7112   EMACS_INT from, from_byte, to, to_byte;
7113   Lisp_Object src_object;
7114
7115   CHECK_NUMBER_COERCE_MARKER (start);
7116   CHECK_NUMBER_COERCE_MARKER (end);
7117   if (NILP (coding_system))
7118     coding_system = Qno_conversion;
7119   else
7120     CHECK_CODING_SYSTEM (coding_system);
7121   src_object = Fcurrent_buffer ();
7122   if (NILP (dst_object))
7123     dst_object = src_object;
7124   else if (! EQ (dst_object, Qt))
7125     CHECK_BUFFER (dst_object);
7126
7127   validate_region (&start, &end);
7128   from = XFASTINT (start);
7129   from_byte = CHAR_TO_BYTE (from);
7130   to = XFASTINT (end);
7131   to_byte = CHAR_TO_BYTE (to);
7132
7133   setup_coding_system (coding_system, &coding);
7134   coding.mode |= CODING_MODE_LAST_BLOCK;
7135
7136   if (encodep)
7137     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7138                           dst_object);
7139   else
7140     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7141                           dst_object);
7142   if (! norecord)
7143     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7144
7145   if (coding.result != CODING_RESULT_SUCCESS)
7146     error ("Code conversion error: %d", coding.result);
7147
7148   return (BUFFERP (dst_object)
7149           ? make_number (coding.produced_char)
7150           : coding.dst_object);
7151 }
7152
7153
7154 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
7155        3, 4, "r\nzCoding system: ",
7156        doc: /* Decode the current region from the specified coding system.
7157 When called from a program, takes four arguments:
7158         START, END, CODING-SYSTEM, and DESTINATION.
7159 START and END are buffer positions.
7160
7161 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7162 If nil, the region between START and END is replace by the decoded text.
7163 If buffer, the decoded text is inserted in the buffer.
7164 If t, the decoded text is returned.
7165
7166 This function sets `last-coding-system-used' to the precise coding system
7167 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7168 not fully specified.)
7169 It returns the length of the decoded text.  */)
7170      (start, end, coding_system, destination)
7171      Lisp_Object start, end, coding_system, destination;
7172 {
7173   return code_convert_region (start, end, coding_system, destination, 0, 0);
7174 }
7175
7176 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
7177        3, 4, "r\nzCoding system: ",
7178        doc: /* Encode the current region by specified coding system.
7179 When called from a program, takes three arguments:
7180 START, END, and CODING-SYSTEM.  START and END are buffer positions.
7181
7182 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7183 If nil, the region between START and END is replace by the encoded text.
7184 If buffer, the encoded text is inserted in the buffer.
7185 If t, the encoded text is returned.
7186
7187 This function sets `last-coding-system-used' to the precise coding system
7188 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7189 not fully specified.)
7190 It returns the length of the encoded text.  */)
7191   (start, end, coding_system, destination)
7192      Lisp_Object start, end, coding_system, destination;
7193 {
7194   return code_convert_region (start, end, coding_system, destination, 1, 0);
7195 }
7196
7197 Lisp_Object
7198 code_convert_string (string, coding_system, dst_object,
7199                      encodep, nocopy, norecord)
7200      Lisp_Object string, coding_system, dst_object;
7201      int encodep, nocopy, norecord;
7202 {
7203   struct coding_system coding;
7204   EMACS_INT chars, bytes;
7205
7206   CHECK_STRING (string);
7207   if (NILP (coding_system))
7208     {
7209       if (! norecord)
7210         Vlast_coding_system_used = Qno_conversion;
7211       if (NILP (dst_object))
7212         return (nocopy ? Fcopy_sequence (string) : string);
7213     }
7214
7215   if (NILP (coding_system))
7216     coding_system = Qno_conversion;
7217   else
7218     CHECK_CODING_SYSTEM (coding_system);
7219   if (NILP (dst_object))
7220     dst_object = Qt;
7221   else if (! EQ (dst_object, Qt))
7222     CHECK_BUFFER (dst_object);
7223
7224   setup_coding_system (coding_system, &coding);
7225   coding.mode |= CODING_MODE_LAST_BLOCK;
7226   chars = XSTRING (string)->size;
7227   bytes = STRING_BYTES (XSTRING (string));
7228   if (encodep)
7229     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7230   else
7231     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7232   if (! norecord)
7233     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7234
7235   if (coding.result != CODING_RESULT_SUCCESS)
7236     error ("Code conversion error: %d", coding.result);
7237
7238   return (BUFFERP (dst_object)
7239           ? make_number (coding.produced_char)
7240           : coding.dst_object);
7241 }
7242
7243
7244 /* Encode or decode STRING according to CODING_SYSTEM.
7245    Do not set Vlast_coding_system_used.
7246
7247    This function is called only from macros DECODE_FILE and
7248    ENCODE_FILE, thus we ignore character composition.  */
7249
7250 Lisp_Object
7251 code_convert_string_norecord (string, coding_system, encodep)
7252      Lisp_Object string, coding_system;
7253      int encodep;
7254 {
7255   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
7256 }
7257
7258
7259 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7260        2, 4, 0,
7261        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7262
7263 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7264 if the decoding operation is trivial.
7265
7266 Optional fourth arg BUFFER non-nil meant that the decoded text is
7267 inserted in BUFFER instead of returned as a string.  In this case,
7268 the return value is BUFFER.
7269
7270 This function sets `last-coding-system-used' to the precise coding system
7271 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7272 not fully specified.  */)
7273   (string, coding_system, nocopy, buffer)
7274      Lisp_Object string, coding_system, nocopy, buffer;
7275 {
7276   return code_convert_string (string, coding_system, buffer,
7277                               0, ! NILP (nocopy), 0);
7278 }
7279
7280 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7281        2, 4, 0,
7282        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7283
7284 Optional third arg NOCOPY non-nil means it is OK to return STRING
7285 itself if the encoding operation is trivial.
7286
7287 Optional fourth arg BUFFER non-nil meant that the encoded text is
7288 inserted in BUFFER instead of returned as a string.  In this case,
7289 the return value is BUFFER.
7290
7291 This function sets `last-coding-system-used' to the precise coding system
7292 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7293 not fully specified.)  */)
7294      (string, coding_system, nocopy, buffer)
7295      Lisp_Object string, coding_system, nocopy, buffer;
7296 {
7297   return code_convert_string (string, coding_system, buffer,
7298                               1, ! NILP (nocopy), 1);
7299 }
7300
7301 \f
7302 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7303        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7304 Return the corresponding character.  */)
7305      (code)
7306      Lisp_Object code;
7307 {
7308   Lisp_Object spec, attrs, val;
7309   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7310   int c;
7311
7312   CHECK_NATNUM (code);
7313   c = XFASTINT (code);
7314   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7315   attrs = AREF (spec, 0);
7316
7317   if (ASCII_BYTE_P (c)
7318       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7319     return code;
7320
7321   val = CODING_ATTR_CHARSET_LIST (attrs);
7322   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7323   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7324   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
7325
7326   if (c <= 0x7F)
7327     charset = charset_roman;
7328   else if (c >= 0xA0 && c < 0xDF)
7329     {
7330       charset = charset_kana;
7331       c -= 0x80;
7332     }
7333   else
7334     {
7335       int s1 = c >> 8, s2 = c & 0xFF;
7336
7337       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7338           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7339         error ("Invalid code: %d", code);
7340       SJIS_TO_JIS (c);
7341       charset = charset_kanji;
7342     }
7343   c = DECODE_CHAR (charset, c);
7344   if (c < 0)
7345     error ("Invalid code: %d", code);
7346   return make_number (c);
7347 }
7348
7349
7350 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7351        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7352 Return the corresponding code in SJIS.  */)
7353      (ch)
7354     Lisp_Object ch;
7355 {
7356   Lisp_Object spec, attrs, charset_list;
7357   int c;
7358   struct charset *charset;
7359   unsigned code;
7360
7361   CHECK_CHARACTER (ch);
7362   c = XFASTINT (ch);
7363   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7364   attrs = AREF (spec, 0);
7365
7366   if (ASCII_CHAR_P (c)
7367       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7368     return ch;
7369
7370   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7371   charset = char_charset (c, charset_list, &code);
7372   if (code == CHARSET_INVALID_CODE (charset))
7373     error ("Can't encode by shift_jis encoding: %d", c);
7374   JIS_TO_SJIS (code);
7375
7376   return make_number (code);
7377 }
7378
7379 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7380        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7381 Return the corresponding character.  */)
7382      (code)
7383      Lisp_Object code;
7384 {
7385   Lisp_Object spec, attrs, val;
7386   struct charset *charset_roman, *charset_big5, *charset;
7387   int c;
7388
7389   CHECK_NATNUM (code);
7390   c = XFASTINT (code);
7391   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7392   attrs = AREF (spec, 0);
7393
7394   if (ASCII_BYTE_P (c)
7395       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7396     return code;
7397
7398   val = CODING_ATTR_CHARSET_LIST (attrs);
7399   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7400   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7401
7402   if (c <= 0x7F)
7403     charset = charset_roman;
7404   else
7405     {
7406       int b1 = c >> 8, b2 = c & 0x7F;
7407       if (b1 < 0xA1 || b1 > 0xFE
7408           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7409         error ("Invalid code: %d", code);
7410       charset = charset_big5;
7411     }
7412   c = DECODE_CHAR (charset, (unsigned )c);
7413   if (c < 0)
7414     error ("Invalid code: %d", code);
7415   return make_number (c);
7416 }
7417
7418 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7419        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7420 Return the corresponding character code in Big5.  */)
7421      (ch)
7422      Lisp_Object ch;
7423 {
7424   Lisp_Object spec, attrs, charset_list;
7425   struct charset *charset;
7426   int c;
7427   unsigned code;
7428
7429   CHECK_CHARACTER (ch);
7430   c = XFASTINT (ch);
7431   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7432   attrs = AREF (spec, 0);
7433   if (ASCII_CHAR_P (c)
7434       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7435     return ch;
7436
7437   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7438   charset = char_charset (c, charset_list, &code);
7439   if (code == CHARSET_INVALID_CODE (charset))
7440     error ("Can't encode by Big5 encoding: %d", c);
7441
7442   return make_number (code);
7443 }
7444
7445 \f
7446 DEFUN ("set-terminal-coding-system-internal",
7447        Fset_terminal_coding_system_internal,
7448        Sset_terminal_coding_system_internal, 1, 1, 0,
7449        doc: /* Internal use only.  */)
7450      (coding_system)
7451      Lisp_Object coding_system;
7452 {
7453   CHECK_SYMBOL (coding_system);
7454   setup_coding_system (Fcheck_coding_system (coding_system),
7455                         &terminal_coding);
7456
7457   /* We had better not send unsafe characters to terminal.  */
7458   terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7459   /* Characer composition should be disabled.  */
7460   terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7461   terminal_coding.src_multibyte = 1;
7462   terminal_coding.dst_multibyte = 0;
7463   return Qnil;
7464 }
7465
7466 DEFUN ("set-safe-terminal-coding-system-internal",
7467        Fset_safe_terminal_coding_system_internal,
7468        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7469        doc: /* Internal use only.  */)
7470      (coding_system)
7471      Lisp_Object coding_system;
7472 {
7473   CHECK_SYMBOL (coding_system);
7474   setup_coding_system (Fcheck_coding_system (coding_system),
7475                        &safe_terminal_coding);
7476   /* Characer composition should be disabled.  */
7477   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7478   safe_terminal_coding.src_multibyte = 1;
7479   safe_terminal_coding.dst_multibyte = 0;
7480   return Qnil;
7481 }
7482
7483 DEFUN ("terminal-coding-system",
7484        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
7485        doc: /* Return coding system specified for terminal output.  */)
7486      ()
7487 {
7488   return CODING_ID_NAME (terminal_coding.id);
7489 }
7490
7491 DEFUN ("set-keyboard-coding-system-internal",
7492        Fset_keyboard_coding_system_internal,
7493        Sset_keyboard_coding_system_internal, 1, 1, 0,
7494        doc: /* Internal use only.  */)
7495      (coding_system)
7496      Lisp_Object coding_system;
7497 {
7498   CHECK_SYMBOL (coding_system);
7499   setup_coding_system (Fcheck_coding_system (coding_system),
7500                        &keyboard_coding);
7501   /* Characer composition should be disabled.  */
7502   keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7503   return Qnil;
7504 }
7505
7506 DEFUN ("keyboard-coding-system",
7507        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
7508        doc: /* Return coding system specified for decoding keyboard input.  */)
7509      ()
7510 {
7511   return CODING_ID_NAME (keyboard_coding.id);
7512 }
7513
7514 \f
7515 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7516        Sfind_operation_coding_system,  1, MANY, 0,
7517        doc: /* Choose a coding system for an operation based on the target name.
7518 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7519 DECODING-SYSTEM is the coding system to use for decoding
7520 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7521 for encoding (in case OPERATION does encoding).
7522
7523 The first argument OPERATION specifies an I/O primitive:
7524   For file I/O, `insert-file-contents' or `write-region'.
7525   For process I/O, `call-process', `call-process-region', or `start-process'.
7526   For network I/O, `open-network-stream'.
7527
7528 The remaining arguments should be the same arguments that were passed
7529 to the primitive.  Depending on which primitive, one of those arguments
7530 is selected as the TARGET.  For example, if OPERATION does file I/O,
7531 whichever argument specifies the file name is TARGET.
7532
7533 TARGET has a meaning which depends on OPERATION:
7534   For file I/O, TARGET is a file name.
7535   For process I/O, TARGET is a process name.
7536   For network I/O, TARGET is a service name or a port number
7537
7538 This function looks up what specified for TARGET in,
7539 `file-coding-system-alist', `process-coding-system-alist',
7540 or `network-coding-system-alist' depending on OPERATION.
7541 They may specify a coding system, a cons of coding systems,
7542 or a function symbol to call.
7543 In the last case, we call the function with one argument,
7544 which is a list of all the arguments given to this function.
7545
7546 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7547      (nargs, args)
7548      int nargs;
7549      Lisp_Object *args;
7550 {
7551   Lisp_Object operation, target_idx, target, val;
7552   register Lisp_Object chain;
7553
7554   if (nargs < 2)
7555     error ("Too few arguments");
7556   operation = args[0];
7557   if (!SYMBOLP (operation)
7558       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7559     error ("Invalid first arguement");
7560   if (nargs < 1 + XINT (target_idx))
7561     error ("Too few arguments for operation: %s",
7562            XSYMBOL (operation)->name->data);
7563   target = args[XINT (target_idx) + 1];
7564   if (!(STRINGP (target)
7565         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7566     error ("Invalid %dth argument", XINT (target_idx) + 1);
7567
7568   chain = ((EQ (operation, Qinsert_file_contents)
7569             || EQ (operation, Qwrite_region))
7570            ? Vfile_coding_system_alist
7571            : (EQ (operation, Qopen_network_stream)
7572               ? Vnetwork_coding_system_alist
7573               : Vprocess_coding_system_alist));
7574   if (NILP (chain))
7575     return Qnil;
7576
7577   for (; CONSP (chain); chain = XCDR (chain))
7578     {
7579       Lisp_Object elt;
7580
7581       elt = XCAR (chain);
7582       if (CONSP (elt)
7583           && ((STRINGP (target)
7584                && STRINGP (XCAR (elt))
7585                && fast_string_match (XCAR (elt), target) >= 0)
7586               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7587         {
7588           val = XCDR (elt);
7589           /* Here, if VAL is both a valid coding system and a valid
7590              function symbol, we return VAL as a coding system.  */
7591           if (CONSP (val))
7592             return val;
7593           if (! SYMBOLP (val))
7594             return Qnil;
7595           if (! NILP (Fcoding_system_p (val)))
7596             return Fcons (val, val);
7597           if (! NILP (Ffboundp (val)))
7598             {
7599               val = call1 (val, Flist (nargs, args));
7600               if (CONSP (val))
7601                 return val;
7602               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7603                 return Fcons (val, val);
7604             }
7605           return Qnil;
7606         }
7607     }
7608   return Qnil;
7609 }
7610
7611 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7612        Sset_coding_system_priority, 0, MANY, 0,
7613        doc: /* Assign higher priority to the coding systems given as arguments.
7614 usage: (set-coding-system-priority CODING-SYSTEM ...)  */)
7615      (nargs, args)
7616      int nargs;
7617      Lisp_Object *args;
7618 {
7619   int i, j;
7620   int changed[coding_category_max];
7621   enum coding_category priorities[coding_category_max];
7622
7623   bzero (changed, sizeof changed);
7624
7625   for (i = j = 0; i < nargs; i++)
7626     {
7627       enum coding_category category;
7628       Lisp_Object spec, attrs;
7629
7630       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7631       attrs = AREF (spec, 0);
7632       category = XINT (CODING_ATTR_CATEGORY (attrs));
7633       if (changed[category])
7634         /* Ignore this coding system because a coding system of the
7635            same category already had a higher priority.  */
7636         continue;
7637       changed[category] = 1;
7638       priorities[j++] = category;
7639       if (coding_categories[category].id >= 0
7640           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7641         setup_coding_system (args[i], &coding_categories[category]);
7642     }
7643
7644   /* Now we have decided top J priorities.  Reflect the order of the
7645      original priorities to the remaining priorities.  */
7646
7647   for (i = j, j = 0; i < coding_category_max; i++, j++)
7648     {
7649       while (j < coding_category_max
7650              && changed[coding_priorities[j]])
7651         j++;
7652       if (j == coding_category_max)
7653         abort ();
7654       priorities[i] = coding_priorities[j];
7655     }
7656
7657   bcopy (priorities, coding_priorities, sizeof priorities);
7658   return Qnil;
7659 }
7660
7661 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7662        Scoding_system_priority_list, 0, 1, 0,
7663        doc: /* Return a list of coding systems ordered by their priorities.
7664 HIGHESTP non-nil means just return the highest priority one.  */)
7665      (highestp)
7666      Lisp_Object highestp;
7667 {
7668   int i;
7669   Lisp_Object val;
7670
7671   for (i = 0, val = Qnil; i < coding_category_max; i++)
7672     {
7673       enum coding_category category = coding_priorities[i];
7674       int id = coding_categories[category].id;
7675       Lisp_Object attrs;
7676
7677       if (id < 0)
7678         continue;
7679       attrs = CODING_ID_ATTRS (id);
7680       if (! NILP (highestp))
7681         return CODING_ATTR_BASE_NAME (attrs);
7682       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7683     }
7684   return Fnreverse (val);
7685 }
7686
7687 static char *suffixes[] = { "-unix", "-dos", "-mac" };
7688
7689 static Lisp_Object
7690 make_subsidiaries (base)
7691      Lisp_Object base;
7692 {
7693   Lisp_Object subsidiaries;
7694   int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7695   char *buf = (char *) alloca (base_name_len + 6);
7696   int i;
7697
7698   bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7699   subsidiaries = Fmake_vector (make_number (3), Qnil);
7700   for (i = 0; i < 3; i++)
7701     {
7702       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7703       ASET (subsidiaries, i, intern (buf));
7704     }
7705   return subsidiaries;
7706 }
7707
7708
7709 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7710        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
7711        doc: /* For internal use only.
7712 usage: (define-coding-system-internal ...)  */)
7713      (nargs, args)
7714      int nargs;
7715      Lisp_Object *args;
7716 {
7717   Lisp_Object name;
7718   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
7719   Lisp_Object attrs;            /* Vector of attributes.  */
7720   Lisp_Object eol_type;
7721   Lisp_Object aliases;
7722   Lisp_Object coding_type, charset_list, safe_charsets;
7723   enum coding_category category;
7724   Lisp_Object tail, val;
7725   int max_charset_id = 0;
7726   int i;
7727
7728   if (nargs < coding_arg_max)
7729     goto short_args;
7730
7731   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7732
7733   name = args[coding_arg_name];
7734   CHECK_SYMBOL (name);
7735   CODING_ATTR_BASE_NAME (attrs) = name;
7736
7737   val = args[coding_arg_mnemonic];
7738   if (! STRINGP (val))
7739     CHECK_CHARACTER (val);
7740   CODING_ATTR_MNEMONIC (attrs) = val;
7741
7742   coding_type = args[coding_arg_coding_type];
7743   CHECK_SYMBOL (coding_type);
7744   CODING_ATTR_TYPE (attrs) = coding_type;
7745
7746   charset_list = args[coding_arg_charset_list];
7747   if (SYMBOLP (charset_list))
7748     {
7749       if (EQ (charset_list, Qiso_2022))
7750         {
7751           if (! EQ (coding_type, Qiso_2022))
7752             error ("Invalid charset-list");
7753           charset_list = Viso_2022_charset_list;
7754         }
7755       else if (EQ (charset_list, Qemacs_mule))
7756         {
7757           if (! EQ (coding_type, Qemacs_mule))
7758             error ("Invalid charset-list");
7759           charset_list = Vemacs_mule_charset_list;
7760         }
7761       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7762         if (max_charset_id < XFASTINT (XCAR (tail)))
7763           max_charset_id = XFASTINT (XCAR (tail));
7764     }
7765   else
7766     {
7767       charset_list = Fcopy_sequence (charset_list);
7768       for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7769         {
7770           struct charset *charset;
7771
7772           val = Fcar (tail);
7773           CHECK_CHARSET_GET_CHARSET (val, charset);
7774           if (EQ (coding_type, Qiso_2022)
7775               ? CHARSET_ISO_FINAL (charset) < 0
7776               : EQ (coding_type, Qemacs_mule)
7777               ? CHARSET_EMACS_MULE_ID (charset) < 0
7778               : 0)
7779             error ("Can't handle charset `%s'",
7780                    XSYMBOL (CHARSET_NAME (charset))->name->data);
7781
7782           XCAR (tail) = make_number (charset->id);
7783           if (max_charset_id < charset->id)
7784             max_charset_id = charset->id;
7785         }
7786     }
7787   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7788
7789   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7790                                 make_number (255));
7791   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7792     XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7793   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7794
7795   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
7796
7797   val = args[coding_arg_decode_translation_table];
7798   if (! NILP (val))
7799     CHECK_CHAR_TABLE (val);
7800   CODING_ATTR_DECODE_TBL (attrs) = val;
7801
7802   val = args[coding_arg_encode_translation_table];
7803   if (! NILP (val))
7804     CHECK_CHAR_TABLE (val);
7805   CODING_ATTR_ENCODE_TBL (attrs) = val;
7806
7807   val = args[coding_arg_post_read_conversion];
7808   CHECK_SYMBOL (val);
7809   CODING_ATTR_POST_READ (attrs) = val;
7810
7811   val = args[coding_arg_pre_write_conversion];
7812   CHECK_SYMBOL (val);
7813   CODING_ATTR_PRE_WRITE (attrs) = val;
7814
7815   val = args[coding_arg_default_char];
7816   if (NILP (val))
7817     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7818   else
7819     {
7820       CHECK_CHARACTER (val);
7821       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7822     }
7823
7824   val = args[coding_arg_plist];
7825   CHECK_LIST (val);
7826   CODING_ATTR_PLIST (attrs) = val;
7827
7828   if (EQ (coding_type, Qcharset))
7829     {
7830       /* Generate a lisp vector of 256 elements.  Each element is nil,
7831          integer, or a list of charset IDs.
7832
7833          If Nth element is nil, the byte code N is invalid in this
7834          coding system.
7835
7836          If Nth element is a number NUM, N is the first byte of a
7837          charset whose ID is NUM.
7838
7839          If Nth element is a list of charset IDs, N is the first byte
7840          of one of them.  The list is sorted by dimensions of the
7841          charsets.  A charset of smaller dimension comes firtst.
7842       */
7843       val = Fmake_vector (make_number (256), Qnil);
7844
7845       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7846         {
7847           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7848           int dim = CHARSET_DIMENSION (charset);
7849           int idx = (dim - 1) * 4;
7850
7851           if (CHARSET_ASCII_COMPATIBLE_P (charset))
7852             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7853
7854           for (i = charset->code_space[idx];
7855                i <= charset->code_space[idx + 1]; i++)
7856             {
7857               Lisp_Object tmp, tmp2;
7858               int dim2;
7859
7860               tmp = AREF (val, i);
7861               if (NILP (tmp))
7862                 tmp = XCAR (tail);
7863               else if (NUMBERP (tmp))
7864                 {
7865                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
7866                   if (dim < dim2)
7867                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
7868                   else
7869                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
7870                 }
7871               else
7872                 {
7873                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
7874                     {
7875                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
7876                       if (dim < dim2)
7877                         break;
7878                     }
7879                   if (NILP (tmp2))
7880                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
7881                   else
7882                     {
7883                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
7884                       XSETCAR (tmp2, XCAR (tail));
7885                     }
7886                 }
7887               ASET (val, i, tmp);
7888             }
7889         }
7890       ASET (attrs, coding_attr_charset_valids, val);
7891       category = coding_category_charset;
7892     }
7893   else if (EQ (coding_type, Qccl))
7894     {
7895       Lisp_Object valids;
7896
7897       if (nargs < coding_arg_ccl_max)
7898         goto short_args;
7899
7900       val = args[coding_arg_ccl_decoder];
7901       CHECK_CCL_PROGRAM (val);
7902       if (VECTORP (val))
7903         val = Fcopy_sequence (val);
7904       ASET (attrs, coding_attr_ccl_decoder, val);
7905
7906       val = args[coding_arg_ccl_encoder];
7907       CHECK_CCL_PROGRAM (val);
7908       if (VECTORP (val))
7909         val = Fcopy_sequence (val);
7910       ASET (attrs, coding_attr_ccl_encoder, val);
7911
7912       val = args[coding_arg_ccl_valids];
7913       valids = Fmake_string (make_number (256), make_number (0));
7914       for (tail = val; !NILP (tail); tail = Fcdr (tail))
7915         {
7916           int from, to;
7917
7918           val = Fcar (tail);
7919           if (INTEGERP (val))
7920             {
7921               from = to = XINT (val);
7922               if (from < 0 || from > 255)
7923                 args_out_of_range_3 (val, make_number (0), make_number (255));
7924             }
7925           else
7926             {
7927               CHECK_CONS (val);
7928               CHECK_NUMBER (XCAR (val));
7929               CHECK_NUMBER (XCDR (val));
7930               from = XINT (XCAR (val));
7931               if (from < 0 || from > 255)
7932                 args_out_of_range_3 (XCAR (val),
7933                                      make_number (0), make_number (255));
7934               to = XINT (XCDR (val));
7935               if (to < from || to > 255)
7936                 args_out_of_range_3 (XCDR (val),
7937                                      XCAR (val), make_number (255));
7938             }
7939           for (i = from; i <= to; i++)
7940             XSTRING (valids)->data[i] = 1;
7941         }
7942       ASET (attrs, coding_attr_ccl_valids, valids);
7943
7944       category = coding_category_ccl;
7945     }
7946   else if (EQ (coding_type, Qutf_16))
7947     {
7948       Lisp_Object bom, endian;
7949
7950       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
7951
7952       if (nargs < coding_arg_utf16_max)
7953         goto short_args;
7954
7955       bom = args[coding_arg_utf16_bom];
7956       if (! NILP (bom) && ! EQ (bom, Qt))
7957         {
7958           CHECK_CONS (bom);
7959           CHECK_CODING_SYSTEM (XCAR (bom));
7960           CHECK_CODING_SYSTEM (XCDR (bom));
7961         }
7962       ASET (attrs, coding_attr_utf_16_bom, bom);
7963
7964       endian = args[coding_arg_utf16_endian];
7965       ASET (attrs, coding_attr_utf_16_endian, endian);
7966
7967       category = (CONSP (bom)
7968                   ? coding_category_utf_16_auto
7969                   : NILP (bom)
7970                   ? (NILP (endian)
7971                      ? coding_category_utf_16_be_nosig
7972                      : coding_category_utf_16_le_nosig)
7973                   : (NILP (endian)
7974                      ? coding_category_utf_16_be
7975                      : coding_category_utf_16_le));
7976     }
7977   else if (EQ (coding_type, Qiso_2022))
7978     {
7979       Lisp_Object initial, reg_usage, request, flags;
7980       int i, id;
7981
7982       if (nargs < coding_arg_iso2022_max)
7983         goto short_args;
7984
7985       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7986       CHECK_VECTOR (initial);
7987       for (i = 0; i < 4; i++)
7988         {
7989           val = Faref (initial, make_number (i));
7990           if (! NILP (val))
7991             {
7992               struct charset *charset;
7993
7994               CHECK_CHARSET_GET_CHARSET (val, charset);
7995               ASET (initial, i, make_number (CHARSET_ID (charset)));
7996               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
7997                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7998             }
7999           else
8000             ASET (initial, i, make_number (-1));
8001         }
8002
8003       reg_usage = args[coding_arg_iso2022_reg_usage];
8004       CHECK_CONS (reg_usage);
8005       CHECK_NATNUM (XCAR (reg_usage));
8006       CHECK_NATNUM (XCDR (reg_usage));
8007
8008       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8009       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
8010         {
8011           int id;
8012
8013           val = Fcar (tail);
8014           CHECK_CONS (val);
8015           CHECK_CHARSET_GET_ID (XCAR (val), id);
8016           CHECK_NATNUM (XCDR (val));
8017           if (XINT (XCDR (val)) >= 4)
8018             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8019           XCAR (val) = make_number (id);
8020         }
8021
8022       flags = args[coding_arg_iso2022_flags];
8023       CHECK_NATNUM (flags);
8024       i = XINT (flags);
8025       if (EQ (args[coding_arg_charset_list], Qiso_2022))
8026         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8027
8028       ASET (attrs, coding_attr_iso_initial, initial);
8029       ASET (attrs, coding_attr_iso_usage, reg_usage);
8030       ASET (attrs, coding_attr_iso_request, request);
8031       ASET (attrs, coding_attr_iso_flags, flags);
8032       setup_iso_safe_charsets (attrs);
8033
8034       if (i & CODING_ISO_FLAG_SEVEN_BITS)
8035         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8036                           | CODING_ISO_FLAG_SINGLE_SHIFT))
8037                     ? coding_category_iso_7_else
8038                     : EQ (args[coding_arg_charset_list], Qiso_2022)
8039                     ? coding_category_iso_7
8040                     : coding_category_iso_7_tight);
8041       else
8042         {
8043           int id = XINT (AREF (initial, 1));
8044
8045           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
8046                        || EQ (args[coding_arg_charset_list], Qiso_2022)
8047                        || id < 0)
8048                       ? coding_category_iso_8_else
8049                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8050                       ? coding_category_iso_8_1
8051                       : coding_category_iso_8_2);
8052         }
8053       if (category != coding_category_iso_8_1
8054           && category != coding_category_iso_8_2)
8055         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8056     }
8057   else if (EQ (coding_type, Qemacs_mule))
8058     {
8059       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8060         ASET (attrs, coding_attr_emacs_mule_full, Qt);
8061       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8062       category = coding_category_emacs_mule;
8063     }
8064   else if (EQ (coding_type, Qshift_jis))
8065     {
8066
8067       struct charset *charset;
8068
8069       if (XINT (Flength (charset_list)) != 3)
8070         error ("There should be just three charsets");
8071
8072       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8073       if (CHARSET_DIMENSION (charset) != 1)
8074         error ("Dimension of charset %s is not one",
8075                XSYMBOL (CHARSET_NAME (charset))->name->data);
8076       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8077         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8078
8079       charset_list = XCDR (charset_list);
8080       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8081       if (CHARSET_DIMENSION (charset) != 1)
8082         error ("Dimension of charset %s is not one",
8083                XSYMBOL (CHARSET_NAME (charset))->name->data);
8084
8085       charset_list = XCDR (charset_list);
8086       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8087       if (CHARSET_DIMENSION (charset) != 2)
8088         error ("Dimension of charset %s is not two",
8089                XSYMBOL (CHARSET_NAME (charset))->name->data);
8090
8091       category = coding_category_sjis;
8092       Vsjis_coding_system = name;
8093     }
8094   else if (EQ (coding_type, Qbig5))
8095     {
8096       struct charset *charset;
8097
8098       if (XINT (Flength (charset_list)) != 2)
8099         error ("There should be just two charsets");
8100
8101       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8102       if (CHARSET_DIMENSION (charset) != 1)
8103         error ("Dimension of charset %s is not one",
8104                XSYMBOL (CHARSET_NAME (charset))->name->data);
8105       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8106         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8107
8108       charset_list = XCDR (charset_list);
8109       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8110       if (CHARSET_DIMENSION (charset) != 2)
8111         error ("Dimension of charset %s is not two",
8112                XSYMBOL (CHARSET_NAME (charset))->name->data);
8113
8114       category = coding_category_big5;
8115       Vbig5_coding_system = name;
8116     }
8117   else if (EQ (coding_type, Qraw_text))
8118     {
8119       category = coding_category_raw_text;
8120       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8121     }
8122   else if (EQ (coding_type, Qutf_8))
8123     {
8124       category = coding_category_utf_8;
8125       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8126     }
8127   else if (EQ (coding_type, Qundecided))
8128     category = coding_category_undecided;
8129   else
8130     error ("Invalid coding system type: %s",
8131            XSYMBOL (coding_type)->name->data);
8132
8133   CODING_ATTR_CATEGORY (attrs) = make_number (category);
8134
8135   eol_type = args[coding_arg_eol_type];
8136   if (! NILP (eol_type)
8137       && ! EQ (eol_type, Qunix)
8138       && ! EQ (eol_type, Qdos)
8139       && ! EQ (eol_type, Qmac))
8140     error ("Invalid eol-type");
8141
8142   aliases = Fcons (name, Qnil);
8143
8144   if (NILP (eol_type))
8145     {
8146       eol_type = make_subsidiaries (name);
8147       for (i = 0; i < 3; i++)
8148         {
8149           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8150
8151           this_name = AREF (eol_type, i);
8152           this_aliases = Fcons (this_name, Qnil);
8153           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8154           this_spec = Fmake_vector (make_number (3), attrs);
8155           ASET (this_spec, 1, this_aliases);
8156           ASET (this_spec, 2, this_eol_type);
8157           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8158           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8159           Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8160                                         Vcoding_system_alist);
8161         }
8162     }
8163
8164   spec_vec = Fmake_vector (make_number (3), attrs);
8165   ASET (spec_vec, 1, aliases);
8166   ASET (spec_vec, 2, eol_type);
8167
8168   Fputhash (name, spec_vec, Vcoding_system_hash_table);
8169   Vcoding_system_list = Fcons (name, Vcoding_system_list);
8170   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8171                                 Vcoding_system_alist);
8172
8173   {
8174     int id = coding_categories[category].id;
8175
8176     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8177       setup_coding_system (name, &coding_categories[category]);
8178   }
8179
8180   return Qnil;
8181
8182  short_args:
8183   return Fsignal (Qwrong_number_of_arguments,
8184                   Fcons (intern ("define-coding-system-internal"),
8185                          make_number (nargs)));
8186 }
8187
8188 /* Fixme: should this record the alias relationships for
8189    diagnostics?  Should it update coding-system-list?  */
8190 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8191        Sdefine_coding_system_alias, 2, 2, 0,
8192        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
8193      (alias, coding_system)
8194      Lisp_Object alias, coding_system;
8195 {
8196   Lisp_Object spec, aliases, eol_type;
8197
8198   CHECK_SYMBOL (alias);
8199   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8200   aliases = AREF (spec, 1);
8201   while (!NILP (XCDR (aliases)))
8202     aliases = XCDR (aliases);
8203   XCDR (aliases) = Fcons (alias, Qnil);
8204
8205   eol_type = AREF (spec, 2);
8206   if (VECTORP (eol_type))
8207     {
8208       Lisp_Object subsidiaries;
8209       int i;
8210
8211       subsidiaries = make_subsidiaries (alias);
8212       for (i = 0; i < 3; i++)
8213         Fdefine_coding_system_alias (AREF (subsidiaries, i),
8214                                      AREF (eol_type, i));
8215
8216       ASET (spec, 2, subsidiaries);
8217     }
8218
8219   Fputhash (alias, spec, Vcoding_system_hash_table);
8220   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8221                                 Vcoding_system_alist);
8222
8223   return Qnil;
8224 }
8225
8226 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8227        1, 1, 0,
8228        doc: /* Return the base of CODING-SYSTEM.
8229 Any alias or subsidiary coding system is not a base coding system.  */)
8230   (coding_system)
8231      Lisp_Object coding_system;
8232 {
8233   Lisp_Object spec, attrs;
8234
8235   if (NILP (coding_system))
8236     return (Qno_conversion);
8237   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8238   attrs = AREF (spec, 0);
8239   return CODING_ATTR_BASE_NAME (attrs);
8240 }
8241
8242 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8243        1, 1, 0,
8244        doc: "Return the property list of CODING-SYSTEM.")
8245      (coding_system)
8246      Lisp_Object coding_system;
8247 {
8248   Lisp_Object spec, attrs;
8249
8250   if (NILP (coding_system))
8251     coding_system = Qno_conversion;
8252   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8253   attrs = AREF (spec, 0);
8254   return CODING_ATTR_PLIST (attrs);
8255 }
8256
8257
8258 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8259        1, 1, 0,
8260        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
8261      (coding_system)
8262      Lisp_Object coding_system;
8263 {
8264   Lisp_Object spec;
8265
8266   if (NILP (coding_system))
8267     coding_system = Qno_conversion;
8268   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8269   return AREF (spec, 1);
8270 }
8271
8272 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8273        Scoding_system_eol_type, 1, 1, 0,
8274        doc: /* Return eol-type of CODING-SYSTEM.
8275 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8276
8277 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8278 and CR respectively.
8279
8280 A vector value indicates that a format of end-of-line should be
8281 detected automatically.  Nth element of the vector is the subsidiary
8282 coding system whose eol-type is N.  */)
8283      (coding_system)
8284      Lisp_Object coding_system;
8285 {
8286   Lisp_Object spec, eol_type;
8287   int n;
8288
8289   if (NILP (coding_system))
8290     coding_system = Qno_conversion;
8291   if (! CODING_SYSTEM_P (coding_system))
8292     return Qnil;
8293   spec = CODING_SYSTEM_SPEC (coding_system);
8294   eol_type = AREF (spec, 2);
8295   if (VECTORP (eol_type))
8296     return Fcopy_sequence (eol_type);
8297   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8298   return make_number (n);
8299 }
8300
8301 #endif /* emacs */
8302
8303 \f
8304 /*** 9. Post-amble ***/
8305
8306 void
8307 init_coding_once ()
8308 {
8309   int i;
8310
8311   for (i = 0; i < coding_category_max; i++)
8312     {
8313       coding_categories[i].id = -1;
8314       coding_priorities[i] = i;
8315     }
8316
8317   /* ISO2022 specific initialize routine.  */
8318   for (i = 0; i < 0x20; i++)
8319     iso_code_class[i] = ISO_control_0;
8320   for (i = 0x21; i < 0x7F; i++)
8321     iso_code_class[i] = ISO_graphic_plane_0;
8322   for (i = 0x80; i < 0xA0; i++)
8323     iso_code_class[i] = ISO_control_1;
8324   for (i = 0xA1; i < 0xFF; i++)
8325     iso_code_class[i] = ISO_graphic_plane_1;
8326   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8327   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8328   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
8329   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8330   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8331   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8332   iso_code_class[ISO_CODE_ESC] = ISO_escape;
8333   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8334   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
8335   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
8336
8337   inhibit_pre_post_conversion = 0;
8338
8339   for (i = 0; i < 256; i++)
8340     {
8341       emacs_mule_bytes[i] = 1;
8342     }
8343   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
8344   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
8345   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
8346   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
8347 }
8348
8349 #ifdef emacs
8350
8351 void
8352 syms_of_coding ()
8353 {
8354   staticpro (&Vcoding_system_hash_table);
8355   Vcoding_system_hash_table = Fmakehash (Qeq);
8356
8357   staticpro (&Vsjis_coding_system);
8358   Vsjis_coding_system = Qnil;
8359
8360   staticpro (&Vbig5_coding_system);
8361   Vbig5_coding_system = Qnil;
8362
8363   staticpro (&Vcode_conversion_work_buf_list);
8364   Vcode_conversion_work_buf_list = Qnil;
8365
8366   staticpro (&Vcode_conversion_reused_work_buf);
8367   Vcode_conversion_reused_work_buf = Qnil;
8368
8369   DEFSYM (Qcharset, "charset");
8370   DEFSYM (Qtarget_idx, "target-idx");
8371   DEFSYM (Qcoding_system_history, "coding-system-history");
8372   Fset (Qcoding_system_history, Qnil);
8373
8374   /* Target FILENAME is the first argument.  */
8375   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
8376   /* Target FILENAME is the third argument.  */
8377   Fput (Qwrite_region, Qtarget_idx, make_number (2));
8378
8379   DEFSYM (Qcall_process, "call-process");
8380   /* Target PROGRAM is the first argument.  */
8381   Fput (Qcall_process, Qtarget_idx, make_number (0));
8382
8383   DEFSYM (Qcall_process_region, "call-process-region");
8384   /* Target PROGRAM is the third argument.  */
8385   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
8386
8387   DEFSYM (Qstart_process, "start-process");
8388   /* Target PROGRAM is the third argument.  */
8389   Fput (Qstart_process, Qtarget_idx, make_number (2));
8390
8391   DEFSYM (Qopen_network_stream, "open-network-stream");
8392   /* Target SERVICE is the fourth argument.  */
8393   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8394
8395   DEFSYM (Qcoding_system, "coding-system");
8396   DEFSYM (Qcoding_aliases, "coding-aliases");
8397
8398   DEFSYM (Qeol_type, "eol-type");
8399   DEFSYM (Qunix, "unix");
8400   DEFSYM (Qdos, "dos");
8401
8402   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8403   DEFSYM (Qpost_read_conversion, "post-read-conversion");
8404   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8405   DEFSYM (Qdefault_char, "default-char");
8406   DEFSYM (Qundecided, "undecided");
8407   DEFSYM (Qno_conversion, "no-conversion");
8408   DEFSYM (Qraw_text, "raw-text");
8409
8410   DEFSYM (Qiso_2022, "iso-2022");
8411
8412   DEFSYM (Qutf_8, "utf-8");
8413
8414   DEFSYM (Qutf_16, "utf-16");
8415   DEFSYM (Qsignature, "signature");
8416   DEFSYM (Qendian, "endian");
8417   DEFSYM (Qbig, "big");
8418   DEFSYM (Qlittle, "little");
8419
8420   DEFSYM (Qshift_jis, "shift-jis");
8421   DEFSYM (Qbig5, "big5");
8422
8423   DEFSYM (Qcoding_system_p, "coding-system-p");
8424
8425   DEFSYM (Qcoding_system_error, "coding-system-error");
8426   Fput (Qcoding_system_error, Qerror_conditions,
8427         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
8428   Fput (Qcoding_system_error, Qerror_message,
8429         build_string ("Invalid coding system"));
8430
8431   /* Intern this now in case it isn't already done.
8432      Setting this variable twice is harmless.
8433      But don't staticpro it here--that is done in alloc.c.  */
8434   Qchar_table_extra_slots = intern ("char-table-extra-slots");
8435
8436   DEFSYM (Qtranslation_table, "translation-table");
8437   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
8438   DEFSYM (Qtranslation_table_id, "translation-table-id");
8439   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8440   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
8441
8442   DEFSYM (Qvalid_codes, "valid-codes");
8443
8444   DEFSYM (Qemacs_mule, "emacs-mule");
8445
8446   Vcoding_category_table
8447     = Fmake_vector (make_number (coding_category_max), Qnil);
8448   staticpro (&Vcoding_category_table);
8449   /* Followings are target of code detection.  */
8450   ASET (Vcoding_category_table, coding_category_iso_7,
8451         intern ("coding-category-iso-7"));
8452   ASET (Vcoding_category_table, coding_category_iso_7_tight,
8453         intern ("coding-category-iso-7-tight"));
8454   ASET (Vcoding_category_table, coding_category_iso_8_1,
8455         intern ("coding-category-iso-8-1"));
8456   ASET (Vcoding_category_table, coding_category_iso_8_2,
8457         intern ("coding-category-iso-8-2"));
8458   ASET (Vcoding_category_table, coding_category_iso_7_else,
8459         intern ("coding-category-iso-7-else"));
8460   ASET (Vcoding_category_table, coding_category_iso_8_else,
8461         intern ("coding-category-iso-8-else"));
8462   ASET (Vcoding_category_table, coding_category_utf_8,
8463         intern ("coding-category-utf-8"));
8464   ASET (Vcoding_category_table, coding_category_utf_16_be,
8465         intern ("coding-category-utf-16-be"));
8466   ASET (Vcoding_category_table, coding_category_utf_16_le,
8467         intern ("coding-category-utf-16-le"));
8468   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8469         intern ("coding-category-utf-16-be-nosig"));
8470   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8471         intern ("coding-category-utf-16-le-nosig"));
8472   ASET (Vcoding_category_table, coding_category_charset,
8473         intern ("coding-category-charset"));
8474   ASET (Vcoding_category_table, coding_category_sjis,
8475         intern ("coding-category-sjis"));
8476   ASET (Vcoding_category_table, coding_category_big5,
8477         intern ("coding-category-big5"));
8478   ASET (Vcoding_category_table, coding_category_ccl,
8479         intern ("coding-category-ccl"));
8480   ASET (Vcoding_category_table, coding_category_emacs_mule,
8481         intern ("coding-category-emacs-mule"));
8482   /* Followings are NOT target of code detection.  */
8483   ASET (Vcoding_category_table, coding_category_raw_text,
8484         intern ("coding-category-raw-text"));
8485   ASET (Vcoding_category_table, coding_category_undecided,
8486         intern ("coding-category-undecided"));
8487
8488   defsubr (&Scoding_system_p);
8489   defsubr (&Sread_coding_system);
8490   defsubr (&Sread_non_nil_coding_system);
8491   defsubr (&Scheck_coding_system);
8492   defsubr (&Sdetect_coding_region);
8493   defsubr (&Sdetect_coding_string);
8494   defsubr (&Sfind_coding_systems_region_internal);
8495   defsubr (&Scheck_coding_systems_region);
8496   defsubr (&Sdecode_coding_region);
8497   defsubr (&Sencode_coding_region);
8498   defsubr (&Sdecode_coding_string);
8499   defsubr (&Sencode_coding_string);
8500   defsubr (&Sdecode_sjis_char);
8501   defsubr (&Sencode_sjis_char);
8502   defsubr (&Sdecode_big5_char);
8503   defsubr (&Sencode_big5_char);
8504   defsubr (&Sset_terminal_coding_system_internal);
8505   defsubr (&Sset_safe_terminal_coding_system_internal);
8506   defsubr (&Sterminal_coding_system);
8507   defsubr (&Sset_keyboard_coding_system_internal);
8508   defsubr (&Skeyboard_coding_system);
8509   defsubr (&Sfind_operation_coding_system);
8510   defsubr (&Sset_coding_system_priority);
8511   defsubr (&Sdefine_coding_system_internal);
8512   defsubr (&Sdefine_coding_system_alias);
8513   defsubr (&Scoding_system_base);
8514   defsubr (&Scoding_system_plist);
8515   defsubr (&Scoding_system_aliases);
8516   defsubr (&Scoding_system_eol_type);
8517   defsubr (&Scoding_system_priority_list);
8518
8519   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
8520                doc: /* List of coding systems.
8521
8522 Do not alter the value of this variable manually.  This variable should be
8523 updated by the functions `define-coding-system' and
8524 `define-coding-system-alias'.  */);
8525   Vcoding_system_list = Qnil;
8526
8527   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
8528                doc: /* Alist of coding system names.
8529 Each element is one element list of coding system name.
8530 This variable is given to `completing-read' as TABLE argument.
8531
8532 Do not alter the value of this variable manually.  This variable should be
8533 updated by the functions `make-coding-system' and
8534 `define-coding-system-alias'.  */);
8535   Vcoding_system_alist = Qnil;
8536
8537   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
8538                doc: /* List of coding-categories (symbols) ordered by priority.
8539
8540 On detecting a coding system, Emacs tries code detection algorithms
8541 associated with each coding-category one by one in this order.  When
8542 one algorithm agrees with a byte sequence of source text, the coding
8543 system bound to the corresponding coding-category is selected.  */);
8544   {
8545     int i;
8546
8547     Vcoding_category_list = Qnil;
8548     for (i = coding_category_max - 1; i >= 0; i--)
8549       Vcoding_category_list
8550         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8551                  Vcoding_category_list);
8552   }
8553
8554   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
8555                doc: /* Specify the coding system for read operations.
8556 It is useful to bind this variable with `let', but do not set it globally.
8557 If the value is a coding system, it is used for decoding on read operation.
8558 If not, an appropriate element is used from one of the coding system alists:
8559 There are three such tables, `file-coding-system-alist',
8560 `process-coding-system-alist', and `network-coding-system-alist'.  */);
8561   Vcoding_system_for_read = Qnil;
8562
8563   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
8564                doc: /* Specify the coding system for write operations.
8565 Programs bind this variable with `let', but you should not set it globally.
8566 If the value is a coding system, it is used for encoding of output,
8567 when writing it to a file and when sending it to a file or subprocess.
8568
8569 If this does not specify a coding system, an appropriate element
8570 is used from one of the coding system alists:
8571 There are three such tables, `file-coding-system-alist',
8572 `process-coding-system-alist', and `network-coding-system-alist'.
8573 For output to files, if the above procedure does not specify a coding system,
8574 the value of `buffer-file-coding-system' is used.  */);
8575   Vcoding_system_for_write = Qnil;
8576
8577   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
8578                doc: /*
8579 Coding system used in the latest file or process I/O.  */);
8580   Vlast_coding_system_used = Qnil;
8581
8582   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
8583                doc: /*
8584 *Non-nil means always inhibit code conversion of end-of-line format.
8585 See info node `Coding Systems' and info node `Text and Binary' concerning
8586 such conversion.  */);
8587   inhibit_eol_conversion = 0;
8588
8589   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
8590                doc: /*
8591 Non-nil means process buffer inherits coding system of process output.
8592 Bind it to t if the process output is to be treated as if it were a file
8593 read from some filesystem.  */);
8594   inherit_process_coding_system = 0;
8595
8596   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
8597                doc: /*
8598 Alist to decide a coding system to use for a file I/O operation.
8599 The format is ((PATTERN . VAL) ...),
8600 where PATTERN is a regular expression matching a file name,
8601 VAL is a coding system, a cons of coding systems, or a function symbol.
8602 If VAL is a coding system, it is used for both decoding and encoding
8603 the file contents.
8604 If VAL is a cons of coding systems, the car part is used for decoding,
8605 and the cdr part is used for encoding.
8606 If VAL is a function symbol, the function must return a coding system
8607 or a cons of coding systems which are used as above.  The function gets
8608 the arguments with which `find-operation-coding-systems' was called.
8609
8610 See also the function `find-operation-coding-system'
8611 and the variable `auto-coding-alist'.  */);
8612   Vfile_coding_system_alist = Qnil;
8613
8614   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
8615                doc: /*
8616 Alist to decide a coding system to use for a process I/O operation.
8617 The format is ((PATTERN . VAL) ...),
8618 where PATTERN is a regular expression matching a program name,
8619 VAL is a coding system, a cons of coding systems, or a function symbol.
8620 If VAL is a coding system, it is used for both decoding what received
8621 from the program and encoding what sent to the program.
8622 If VAL is a cons of coding systems, the car part is used for decoding,
8623 and the cdr part is used for encoding.
8624 If VAL is a function symbol, the function must return a coding system
8625 or a cons of coding systems which are used as above.
8626
8627 See also the function `find-operation-coding-system'.  */);
8628   Vprocess_coding_system_alist = Qnil;
8629
8630   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
8631                doc: /*
8632 Alist to decide a coding system to use for a network I/O operation.
8633 The format is ((PATTERN . VAL) ...),
8634 where PATTERN is a regular expression matching a network service name
8635 or is a port number to connect to,
8636 VAL is a coding system, a cons of coding systems, or a function symbol.
8637 If VAL is a coding system, it is used for both decoding what received
8638 from the network stream and encoding what sent to the network stream.
8639 If VAL is a cons of coding systems, the car part is used for decoding,
8640 and the cdr part is used for encoding.
8641 If VAL is a function symbol, the function must return a coding system
8642 or a cons of coding systems which are used as above.
8643
8644 See also the function `find-operation-coding-system'.  */);
8645   Vnetwork_coding_system_alist = Qnil;
8646
8647   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
8648                doc: /* Coding system to use with system messages.
8649 Also used for decoding keyboard input on X Window system.  */);
8650   Vlocale_coding_system = Qnil;
8651
8652   /* The eol mnemonics are reset in startup.el system-dependently.  */
8653   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
8654                doc: /*
8655 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
8656   eol_mnemonic_unix = build_string (":");
8657
8658   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
8659                doc: /*
8660 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
8661   eol_mnemonic_dos = build_string ("\\");
8662
8663   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
8664                doc: /*
8665 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
8666   eol_mnemonic_mac = build_string ("/");
8667
8668   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
8669                doc: /*
8670 *String displayed in mode line when end-of-line format is not yet determined.  */);
8671   eol_mnemonic_undecided = build_string (":");
8672
8673   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
8674                doc: /*
8675 *Non-nil enables character translation while encoding and decoding.  */);
8676   Venable_character_translation = Qt;
8677
8678   DEFVAR_LISP ("standard-translation-table-for-decode",
8679                &Vstandard_translation_table_for_decode,
8680                doc: /* Table for translating characters while decoding.  */);
8681   Vstandard_translation_table_for_decode = Qnil;
8682
8683   DEFVAR_LISP ("standard-translation-table-for-encode",
8684                &Vstandard_translation_table_for_encode,
8685                doc: /* Table for translating characters while encoding.  */);
8686   Vstandard_translation_table_for_encode = Qnil;
8687
8688   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
8689                doc: /* Alist of charsets vs revision numbers.
8690 While encoding, if a charset (car part of an element) is found,
8691 designate it with the escape sequence identifying revision (cdr part
8692 of the element).  */);
8693   Vcharset_revision_table = Qnil;
8694
8695   DEFVAR_LISP ("default-process-coding-system",
8696                &Vdefault_process_coding_system,
8697                doc: /* Cons of coding systems used for process I/O by default.
8698 The car part is used for decoding a process output,
8699 the cdr part is used for encoding a text to be sent to a process.  */);
8700   Vdefault_process_coding_system = Qnil;
8701
8702   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
8703                doc: /*
8704 Table of extra Latin codes in the range 128..159 (inclusive).
8705 This is a vector of length 256.
8706 If Nth element is non-nil, the existence of code N in a file
8707 \(or output of subprocess) doesn't prevent it to be detected as
8708 a coding system of ISO 2022 variant which has a flag
8709 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8710 or reading output of a subprocess.
8711 Only 128th through 159th elements has a meaning.  */);
8712   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
8713
8714   DEFVAR_LISP ("select-safe-coding-system-function",
8715                &Vselect_safe_coding_system_function,
8716                doc: /*
8717 Function to call to select safe coding system for encoding a text.
8718
8719 If set, this function is called to force a user to select a proper
8720 coding system which can encode the text in the case that a default
8721 coding system used in each operation can't encode the text.
8722
8723 The default value is `select-safe-coding-system' (which see).  */);
8724   Vselect_safe_coding_system_function = Qnil;
8725
8726   DEFVAR_BOOL ("inhibit-iso-escape-detection",
8727                &inhibit_iso_escape_detection,
8728                doc: /*
8729 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
8730
8731 By default, on reading a file, Emacs tries to detect how the text is
8732 encoded.  This code detection is sensitive to escape sequences.  If
8733 the sequence is valid as ISO2022, the code is determined as one of
8734 the ISO2022 encodings, and the file is decoded by the corresponding
8735 coding system (e.g. `iso-2022-7bit').
8736
8737 However, there may be a case that you want to read escape sequences in
8738 a file as is.  In such a case, you can set this variable to non-nil.
8739 Then, as the code detection ignores any escape sequences, no file is
8740 detected as encoded in some ISO2022 encoding.  The result is that all
8741 escape sequences become visible in a buffer.
8742
8743 The default value is nil, and it is strongly recommended not to change
8744 it.  That is because many Emacs Lisp source files that contain
8745 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8746 in Emacs's distribution, and they won't be decoded correctly on
8747 reading if you suppress escape sequence detection.
8748
8749 The other way to read escape sequences in a file without decoding is
8750 to explicitly specify some coding system that doesn't use ISO2022's
8751 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
8752   inhibit_iso_escape_detection = 0;
8753
8754   {
8755     Lisp_Object args[coding_arg_max];
8756     Lisp_Object plist[14];
8757     int i;
8758
8759     for (i = 0; i < coding_arg_max; i++)
8760       args[i] = Qnil;
8761
8762     plist[0] = intern (":name");
8763     plist[1] = args[coding_arg_name] = Qno_conversion;
8764     plist[2] = intern (":mnemonic");
8765     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8766     plist[4] = intern (":coding-type");
8767     plist[5] = args[coding_arg_coding_type] = Qraw_text;
8768     plist[6] = intern (":ascii-compatible-p");
8769     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8770     plist[8] = intern (":default-char");
8771     plist[9] = args[coding_arg_default_char] = make_number (0);
8772     plist[10] = intern (":docstring");
8773     plist[11] = build_string ("Do no conversion.\n\
8774 \n\
8775 When you visit a file with this coding, the file is read into a\n\
8776 unibyte buffer as is, thus each byte of a file is treated as a\n\
8777 character.");
8778     plist[12] = intern (":eol-type");
8779     plist[13] = args[coding_arg_eol_type] = Qunix;
8780     args[coding_arg_plist] = Flist (14, plist);
8781     Fdefine_coding_system_internal (coding_arg_max, args);
8782   }
8783
8784   setup_coding_system (Qno_conversion, &keyboard_coding);
8785   setup_coding_system (Qno_conversion, &terminal_coding);
8786   setup_coding_system (Qno_conversion, &safe_terminal_coding);
8787 }
8788
8789 char *
8790 emacs_strerror (error_number)
8791      int error_number;
8792 {
8793   char *str;
8794
8795   synchronize_system_messages_locale ();
8796   str = strerror (error_number);
8797
8798   if (! NILP (Vlocale_coding_system))
8799     {
8800       Lisp_Object dec = code_convert_string_norecord (build_string (str),
8801                                                       Vlocale_coding_system,
8802                                                       0);
8803       str = (char *) XSTRING (dec)->data;
8804     }
8805
8806   return str;
8807 }
8808
8809 #endif /* emacs */