src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001, 2002 Free Software Foundation, Inc.
   5    Copyright (C) 2001, 2002
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H13PRO009
   8
   9 This file is part of GNU Emacs.
  10
  11 GNU Emacs is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU General Public License as published by
  13 the Free Software Foundation; either version 2, or (at your option)
  14 any later version.
  15
  16 GNU Emacs is distributed in the hope that it will be useful,
  17 but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 GNU General Public License for more details.
  20
  21 You should have received a copy of the GNU General Public License
  22 along with GNU Emacs; see the file COPYING.  If not, write to
  23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  24 Boston, MA 02111-1307, USA.  */
  25
  26 /*** TABLE OF CONTENTS ***
  27
  28   0. General comments
  29   1. Preamble
  30   2. Emacs' internal format (emacs-utf-8) handlers
  31   3. UTF-8 handlers
  32   4. UTF-16 handlers
  33   5. Charset-base coding systems handlers
  34   6. emacs-mule (old Emacs' internal format) handlers
  35   7. ISO2022 handlers
  36   8. Shift-JIS and BIG5 handlers
  37   9. CCL handlers
  38   10. C library functions
  39   11. Emacs Lisp library functions
  40   12. Postamble
  41
  42 */
  43
  44 /*** 0. General comments ***
  45
  46
  47 CODING SYSTEM
  48
  49   A coding system is an object for an encoding mechanism that contains
  50   information about how to convert byte sequences to character
  51   sequences and vice versa.  When we say "decode", it means converting
  52   a byte sequence of a specific coding system into a character
  53   sequence that is represented by Emacs' internal coding system
  54   `emacs-utf-8', and when we say "encode", it means converting a
  55   character sequence of emacs-utf-8 to a byte sequence of a specific
  56   coding system.
  57
  58   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  59   C level, a coding system is represented by a vector of attributes
  60   stored in the hash table Vcharset_hash_table.  The conversion from
  61   coding system symbol to attributes vector is done by looking up
  62   Vcharset_hash_table by the symbol.
  63
  64   Coding systems are classified into the following types depending on
  65   the encoding mechanism.  Here's a brief description of the types.
  66
  67   o UTF-8
  68
  69   o UTF-16
  70
  71   o Charset-base coding system
  72
  73   A coding system defined by one or more (coded) character sets.
  74   Decoding and encoding are done by a code converter defined for each
  75   character set.
  76
  77   o Old Emacs internal format (emacs-mule)
  78
  79   The coding system adopted by old versions of Emacs (20 and 21).
  80
  81   o ISO2022-base coding system
  82
  83   The most famous coding system for multiple character sets.  X's
  84   Compound Text, various EUCs (Extended Unix Code), and coding systems
  85   used in the Internet communication such as ISO-2022-JP are all
  86   variants of ISO2022.
  87
  88   o SJIS (or Shift-JIS or MS-Kanji-Code)
  89
  90   A coding system to encode character sets: ASCII, JISX0201, and
  91   JISX0208.  Widely used for PC's in Japan.  Details are described in
  92   section 8.
  93
  94   o BIG5
  95
  96   A coding system to encode character sets: ASCII and Big5.  Widely
  97   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  98   described in section 8.  In this file, when we write "big5" (all
  99   lowercase), we mean the coding system, and when we write "Big5"
 100   (capitalized), we mean the character set.
 101
 102   o CCL
 103
 104   If a user wants to decode/encode text encoded in a coding system
 105   not listed above, he can supply a decoder and an encoder for it in
 106   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 107   program while decoding/encoding.
 108
 109   o Raw-text
 110
 111   A coding system for text containing raw eight-bit data.  Emacs
 112   treats each byte of source text as a character (except for
 113   end-of-line conversion).
 114
 115   o No-conversion
 116
 117   Like raw text, but don't do end-of-line conversion.
 118
 119
 120 END-OF-LINE FORMAT
 121
 122   How text end-of-line is encoded depends on operating system.  For
 123   instance, Unix's format is just one byte of LF (line-feed) code,
 124   whereas DOS's format is two-byte sequence of `carriage-return' and
 125   `line-feed' codes.  MacOS's format is usually one byte of
 126   `carriage-return'.
 127
 128   Since text character encoding and end-of-line encoding are
 129   independent, any coding system described above can take any format
 130   of end-of-line (except for no-conversion).
 131
 132 STRUCT CODING_SYSTEM
 133
 134   Before using a coding system for code conversion (i.e. decoding and
 135   encoding), we setup a structure of type `struct coding_system'.
 136   This structure keeps various information about a specific code
 137   conversion (e.g. the location of source and destination data).
 138
 139 */
 140
 141 /* COMMON MACROS */
 142
 143
 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 145
 146   These functions check if a byte sequence specified as a source in
 147   CODING conforms to the format of XXX, and update the members of
 148   DETECT_INFO.
 149
 150   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 151
 152   Below is the template of these functions.  */
 153
 154 #if 0
 155 static int
 156 detect_coding_XXX (coding, detect_info)
 157      struct coding_system *coding;
 158      struct coding_detection_info *detect_info;
 159 {
 160   unsigned char *src = coding->source;
 161   unsigned char *src_end = coding->source + coding->src_bytes;
 162   int multibytep = coding->src_multibyte;
 163   int consumed_chars = 0;
 164   int found = 0;
 165   ...;
 166
 167   while (1)
 168     {
 169       /* Get one byte from the source.  If the souce is exausted, jump
 170          to no_more_source:.  */
 171       ONE_MORE_BYTE (c);
 172
 173       if (! __C_conforms_to_XXX___ (c))
 174         break;
 175       if (! __C_strongly_suggests_XXX__ (c))
 176         found = CATEGORY_MASK_XXX;
 177     }
 178   /* The byte sequence is invalid for XXX.  */
 179   detect_info->rejected |= CATEGORY_MASK_XXX;
 180   return 0;
 181
 182  no_more_source:
 183   /* The source exausted successfully.  */
 184   detect_info->found |= found;
 185   return 1;
 186 }
 187 #endif
 188
 189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 190
 191   These functions decode a byte sequence specified as a source by
 192   CODING.  The resulting multibyte text goes to a place pointed to by
 193   CODING->charbuf, the length of which should not exceed
 194   CODING->charbuf_size;
 195
 196   These functions set the information of original and decoded texts in
 197   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 198   They also set CODING->result to one of CODING_RESULT_XXX indicating
 199   how the decoding is finished.
 200
 201   Below is the template of these functions.  */
 202
 203 #if 0
 204 static void
 205 decode_coding_XXXX (coding)
 206      struct coding_system *coding;
 207 {
 208   unsigned char *src = coding->source + coding->consumed;
 209   unsigned char *src_end = coding->source + coding->src_bytes;
 210   /* SRC_BASE remembers the start position in source in each loop.
 211      The loop will be exited when there's not enough source code, or
 212      when there's no room in CHARBUF for a decoded character.  */
 213   unsigned char *src_base;
 214   /* A buffer to produce decoded characters.  */
 215   int *charbuf = coding->charbuf;
 216   int *charbuf_end = charbuf + coding->charbuf_size;
 217   int multibytep = coding->src_multibyte;
 218
 219   while (1)
 220     {
 221       src_base = src;
 222       if (charbuf < charbuf_end)
 223         /* No more room to produce a decoded character.  */
 224         break;
 225       ONE_MORE_BYTE (c);
 226       /* Decode it. */
 227     }
 228
 229  no_more_source:
 230   if (src_base < src_end
 231       && coding->mode & CODING_MODE_LAST_BLOCK)
 232     /* If the source ends by partial bytes to construct a character,
 233        treat them as eight-bit raw data.  */
 234     while (src_base < src_end && charbuf < charbuf_end)
 235       *charbuf++ = *src_base++;
 236   /* Remember how many bytes and characters we consumed.  If the
 237      source is multibyte, the bytes and chars are not identical.  */
 238   coding->consumed = coding->consumed_char = src_base - coding->source;
 239   /* Remember how many characters we produced.  */
 240   coding->charbuf_used = charbuf - coding->charbuf;
 241 }
 242 #endif
 243
 244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 245
 246   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 247   internal multibyte format by CODING.  The resulting byte sequence
 248   goes to a place pointed to by DESTINATION, the length of which
 249   should not exceed DST_BYTES.
 250
 251   These functions set the information of original and encoded texts in
 252   the members produced, produced_char, consumed, and consumed_char of
 253   the structure *CODING.  They also set the member result to one of
 254   CODING_RESULT_XXX indicating how the encoding finished.
 255
 256   DST_BYTES zero means that source area and destination area are
 257   overlapped, which means that we can produce a encoded text until it
 258   reaches at the head of not-yet-encoded source text.
 259
 260   Below is a template of these functions.  */
 261 #if 0
 262 static void
 263 encode_coding_XXX (coding)
 264      struct coding_system *coding;
 265 {
 266   int multibytep = coding->dst_multibyte;
 267   int *charbuf = coding->charbuf;
 268   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 269   unsigned char *dst = coding->destination + coding->produced;
 270   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 271   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 272   int produced_chars = 0;
 273
 274   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 275     {
 276       int c = *charbuf;
 277       /* Encode C into DST, and increment DST.  */
 278     }
 279  label_no_more_destination:
 280   /* How many chars and bytes we produced.  */
 281   coding->produced_char += produced_chars;
 282   coding->produced = dst - coding->destination;
 283 }
 284 #endif
 285
 286 \f
 287 /*** 1. Preamble ***/
 288
 289 #include <config.h>
 290 #include <stdio.h>
 291
 292 #include "lisp.h"
 293 #include "buffer.h"
 294 #include "character.h"
 295 #include "charset.h"
 296 #include "ccl.h"
 297 #include "composite.h"
 298 #include "coding.h"
 299 #include "window.h"
 300
 301 Lisp_Object Vcoding_system_hash_table;
 302
 303 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 304 Lisp_Object Qunix, Qdos;
 305 extern Lisp_Object Qmac;        /* frame.c */
 306 Lisp_Object Qbuffer_file_coding_system;
 307 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 308 Lisp_Object Qdefault_char;
 309 Lisp_Object Qno_conversion, Qundecided;
 310 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 311 Lisp_Object Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig, Qutf_16_le;
 312 Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
 313 Lisp_Object Qcoding_system_history;
 314 Lisp_Object Qvalid_codes;
 315
 316 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 317 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 318 Lisp_Object Qstart_process, Qopen_network_stream;
 319 Lisp_Object Qtarget_idx;
 320
 321 Lisp_Object Vselect_safe_coding_system_function;
 322
 323 /* Mnemonic string for each format of end-of-line.  */
 324 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 325 /* Mnemonic string to indicate format of end-of-line is not yet
 326    decided.  */
 327 Lisp_Object eol_mnemonic_undecided;
 328
 329 #ifdef emacs
 330
 331 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 332
 333 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 334
 335 /* Coding system emacs-mule and raw-text are for converting only
 336    end-of-line format.  */
 337 Lisp_Object Qemacs_mule, Qraw_text;
 338
 339 /* Coding-systems are handed between Emacs Lisp programs and C internal
 340    routines by the following three variables.  */
 341 /* Coding-system for reading files and receiving data from process.  */
 342 Lisp_Object Vcoding_system_for_read;
 343 /* Coding-system for writing files and sending data to process.  */
 344 Lisp_Object Vcoding_system_for_write;
 345 /* Coding-system actually used in the latest I/O.  */
 346 Lisp_Object Vlast_coding_system_used;
 347
 348 /* A vector of length 256 which contains information about special
 349    Latin codes (especially for dealing with Microsoft codes).  */
 350 Lisp_Object Vlatin_extra_code_table;
 351
 352 /* Flag to inhibit code conversion of end-of-line format.  */
 353 int inhibit_eol_conversion;
 354
 355 /* Flag to inhibit ISO2022 escape sequence detection.  */
 356 int inhibit_iso_escape_detection;
 357
 358 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 359 int inherit_process_coding_system;
 360
 361 /* Coding system to be used to encode text for terminal display.  */
 362 struct coding_system terminal_coding;
 363
 364 /* Coding system to be used to encode text for terminal display when
 365    terminal coding system is nil.  */
 366 struct coding_system safe_terminal_coding;
 367
 368 /* Coding system of what is sent from terminal keyboard.  */
 369 struct coding_system keyboard_coding;
 370
 371 Lisp_Object Vfile_coding_system_alist;
 372 Lisp_Object Vprocess_coding_system_alist;
 373 Lisp_Object Vnetwork_coding_system_alist;
 374
 375 Lisp_Object Vlocale_coding_system;
 376
 377 #endif /* emacs */
 378
 379 /* Flag to tell if we look up translation table on character code
 380    conversion.  */
 381 Lisp_Object Venable_character_translation;
 382 /* Standard translation table to look up on decoding (reading).  */
 383 Lisp_Object Vstandard_translation_table_for_decode;
 384 /* Standard translation table to look up on encoding (writing).  */
 385 Lisp_Object Vstandard_translation_table_for_encode;
 386
 387 Lisp_Object Qtranslation_table;
 388 Lisp_Object Qtranslation_table_id;
 389 Lisp_Object Qtranslation_table_for_decode;
 390 Lisp_Object Qtranslation_table_for_encode;
 391
 392 /* Alist of charsets vs revision number.  */
 393 static Lisp_Object Vcharset_revision_table;
 394
 395 /* Default coding systems used for process I/O.  */
 396 Lisp_Object Vdefault_process_coding_system;
 397
 398 /* Global flag to tell that we can't call post-read-conversion and
 399    pre-write-conversion functions.  Usually the value is zero, but it
 400    is set to 1 temporarily while such functions are running.  This is
 401    to avoid infinite recursive call.  */
 402 static int inhibit_pre_post_conversion;
 403
 404 /* Two special coding systems.  */
 405 Lisp_Object Vsjis_coding_system;
 406 Lisp_Object Vbig5_coding_system;
 407
 408
 409 static int detect_coding_utf_8 P_ ((struct coding_system *,
 410                                     struct coding_detection_info *info));
 411 static void decode_coding_utf_8 P_ ((struct coding_system *));
 412 static int encode_coding_utf_8 P_ ((struct coding_system *));
 413
 414 static int detect_coding_utf_16 P_ ((struct coding_system *,
 415                                      struct coding_detection_info *info));
 416 static void decode_coding_utf_16 P_ ((struct coding_system *));
 417 static int encode_coding_utf_16 P_ ((struct coding_system *));
 418
 419 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 420                                        struct coding_detection_info *info));
 421 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 422 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 423
 424 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 425                                          struct coding_detection_info *info));
 426 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 427 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 428
 429 static int detect_coding_sjis P_ ((struct coding_system *,
 430                                    struct coding_detection_info *info));
 431 static void decode_coding_sjis P_ ((struct coding_system *));
 432 static int encode_coding_sjis P_ ((struct coding_system *));
 433
 434 static int detect_coding_big5 P_ ((struct coding_system *,
 435                                    struct coding_detection_info *info));
 436 static void decode_coding_big5 P_ ((struct coding_system *));
 437 static int encode_coding_big5 P_ ((struct coding_system *));
 438
 439 static int detect_coding_ccl P_ ((struct coding_system *,
 440                                   struct coding_detection_info *info));
 441 static void decode_coding_ccl P_ ((struct coding_system *));
 442 static int encode_coding_ccl P_ ((struct coding_system *));
 443
 444 static void decode_coding_raw_text P_ ((struct coding_system *));
 445 static int encode_coding_raw_text P_ ((struct coding_system *));
 446
 447
 448 /* ISO2022 section */
 449
 450 #define CODING_ISO_INITIAL(coding, reg)                 \
 451   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 452                      coding_attr_iso_initial),          \
 453                reg)))
 454
 455
 456 #define CODING_ISO_REQUEST(coding, charset_id)  \
 457   ((charset_id <= (coding)->max_charset_id      \
 458     ? (coding)->safe_charsets[charset_id]       \
 459     : -1))
 460
 461
 462 #define CODING_ISO_FLAGS(coding)        \
 463   ((coding)->spec.iso_2022.flags)
 464 #define CODING_ISO_DESIGNATION(coding, reg)     \
 465   ((coding)->spec.iso_2022.current_designation[reg])
 466 #define CODING_ISO_INVOCATION(coding, plane)    \
 467   ((coding)->spec.iso_2022.current_invocation[plane])
 468 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 469   ((coding)->spec.iso_2022.single_shifting)
 470 #define CODING_ISO_BOL(coding)  \
 471   ((coding)->spec.iso_2022.bol)
 472 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 473   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 474
 475 /* Control characters of ISO2022.  */
 476                         /* code */      /* function */
 477 #define ISO_CODE_LF     0x0A            /* line-feed */
 478 #define ISO_CODE_CR     0x0D            /* carriage-return */
 479 #define ISO_CODE_SO     0x0E            /* shift-out */
 480 #define ISO_CODE_SI     0x0F            /* shift-in */
 481 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 482 #define ISO_CODE_ESC    0x1B            /* escape */
 483 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 484 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 485 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 486
 487 /* All code (1-byte) of ISO2022 is classified into one of the
 488    followings.  */
 489 enum iso_code_class_type
 490   {
 491     ISO_control_0,              /* Control codes in the range
 492                                    0x00..0x1F and 0x7F, except for the
 493                                    following 5 codes.  */
 494     ISO_carriage_return,        /* ISO_CODE_CR (0x0D) */
 495     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 496     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 497     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 498     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 499     ISO_control_1,              /* Control codes in the range
 500                                    0x80..0x9F, except for the
 501                                    following 3 codes.  */
 502     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 503     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 504     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 505     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 506     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 507     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 508     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 509   };
 510
 511 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 512     `iso-flags' attribute of an iso2022 coding system.  */
 513
 514 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 515    instead of the correct short-form sequence (e.g. ESC $ A).  */
 516 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 517
 518 /* If set, reset graphic planes and registers at end-of-line to the
 519    initial state.  */
 520 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 521
 522 /* If set, reset graphic planes and registers before any control
 523    characters to the initial state.  */
 524 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 525
 526 /* If set, encode by 7-bit environment.  */
 527 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 528
 529 /* If set, use locking-shift function.  */
 530 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 531
 532 /* If set, use single-shift function.  Overwrite
 533    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 534 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 535
 536 /* If set, use designation escape sequence.  */
 537 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 538
 539 /* If set, produce revision number sequence.  */
 540 #define CODING_ISO_FLAG_REVISION        0x0080
 541
 542 /* If set, produce ISO6429's direction specifying sequence.  */
 543 #define CODING_ISO_FLAG_DIRECTION       0x0100
 544
 545 /* If set, assume designation states are reset at beginning of line on
 546    output.  */
 547 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 548
 549 /* If set, designation sequence should be placed at beginning of line
 550    on output.  */
 551 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 552
 553 /* If set, do not encode unsafe charactes on output.  */
 554 #define CODING_ISO_FLAG_SAFE            0x0800
 555
 556 /* If set, extra latin codes (128..159) are accepted as a valid code
 557    on input.  */
 558 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 559
 560 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 561
 562 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 563
 564 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 565
 566 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 567
 568 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 569
 570 /* A character to be produced on output if encoding of the original
 571    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 572 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 573
 574
 575 /* UTF-16 section */
 576 #define CODING_UTF_16_BOM(coding)       \
 577   ((coding)->spec.utf_16.bom)
 578
 579 #define CODING_UTF_16_ENDIAN(coding)    \
 580   ((coding)->spec.utf_16.endian)
 581
 582 #define CODING_UTF_16_SURROGATE(coding) \
 583   ((coding)->spec.utf_16.surrogate)
 584
 585
 586 /* CCL section */
 587 #define CODING_CCL_DECODER(coding)      \
 588   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 589 #define CODING_CCL_ENCODER(coding)      \
 590   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 591 #define CODING_CCL_VALIDS(coding)                                          \
 592   (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
 593    ->data)
 594
 595 /* Index for each coding category in `coding_categories' */
 596
 597 enum coding_category
 598   {
 599     coding_category_iso_7,
 600     coding_category_iso_7_tight,
 601     coding_category_iso_8_1,
 602     coding_category_iso_8_2,
 603     coding_category_iso_7_else,
 604     coding_category_iso_8_else,
 605     coding_category_utf_8,
 606     coding_category_utf_16_auto,
 607     coding_category_utf_16_be,
 608     coding_category_utf_16_le,
 609     coding_category_utf_16_be_nosig,
 610     coding_category_utf_16_le_nosig,
 611     coding_category_charset,
 612     coding_category_sjis,
 613     coding_category_big5,
 614     coding_category_ccl,
 615     coding_category_emacs_mule,
 616     /* All above are targets of code detection.  */
 617     coding_category_raw_text,
 618     coding_category_undecided,
 619     coding_category_max
 620   };
 621
 622 /* Definitions of flag bits used in detect_coding_XXXX.  */
 623 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 624 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 625 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 626 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 627 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 628 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 629 #define CATEGORY_MASK_UTF_8             (1 << coding_category_utf_8)
 630 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 631 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 632 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 633 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 634 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 635 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 636 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 637 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 638 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 639 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 640
 641 /* This value is returned if detect_coding_mask () find nothing other
 642    than ASCII characters.  */
 643 #define CATEGORY_MASK_ANY               \
 644   (CATEGORY_MASK_ISO_7                  \
 645    | CATEGORY_MASK_ISO_7_TIGHT          \
 646    | CATEGORY_MASK_ISO_8_1              \
 647    | CATEGORY_MASK_ISO_8_2              \
 648    | CATEGORY_MASK_ISO_7_ELSE           \
 649    | CATEGORY_MASK_ISO_8_ELSE           \
 650    | CATEGORY_MASK_UTF_8                \
 651    | CATEGORY_MASK_UTF_16_BE            \
 652    | CATEGORY_MASK_UTF_16_LE            \
 653    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 654    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 655    | CATEGORY_MASK_CHARSET              \
 656    | CATEGORY_MASK_SJIS                 \
 657    | CATEGORY_MASK_BIG5                 \
 658    | CATEGORY_MASK_CCL                  \
 659    | CATEGORY_MASK_EMACS_MULE)
 660
 661
 662 #define CATEGORY_MASK_ISO_7BIT \
 663   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 664
 665 #define CATEGORY_MASK_ISO_8BIT \
 666   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 667
 668 #define CATEGORY_MASK_ISO_ELSE \
 669   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 670
 671 #define CATEGORY_MASK_ISO_ESCAPE        \
 672   (CATEGORY_MASK_ISO_7                  \
 673    | CATEGORY_MASK_ISO_7_TIGHT          \
 674    | CATEGORY_MASK_ISO_7_ELSE           \
 675    | CATEGORY_MASK_ISO_8_ELSE)
 676
 677 #define CATEGORY_MASK_ISO       \
 678   (  CATEGORY_MASK_ISO_7BIT     \
 679      | CATEGORY_MASK_ISO_8BIT   \
 680      | CATEGORY_MASK_ISO_ELSE)
 681
 682 #define CATEGORY_MASK_UTF_16            \
 683   (CATEGORY_MASK_UTF_16_BE              \
 684    | CATEGORY_MASK_UTF_16_LE            \
 685    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 686    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 687
 688
 689 /* List of symbols `coding-category-xxx' ordered by priority.  This
 690    variable is exposed to Emacs Lisp.  */
 691 static Lisp_Object Vcoding_category_list;
 692
 693 /* Table of coding categories (Lisp symbols).  This variable is for
 694    internal use oly.  */
 695 static Lisp_Object Vcoding_category_table;
 696
 697 /* Table of coding-categories ordered by priority.  */
 698 static enum coding_category coding_priorities[coding_category_max];
 699
 700 /* Nth element is a coding context for the coding system bound to the
 701    Nth coding category.  */
 702 static struct coding_system coding_categories[coding_category_max];
 703
 704 static int detected_mask[coding_category_raw_text] =
 705   { CATEGORY_MASK_ISO,
 706     CATEGORY_MASK_ISO,
 707     CATEGORY_MASK_ISO,
 708     CATEGORY_MASK_ISO,
 709     CATEGORY_MASK_ISO,
 710     CATEGORY_MASK_ISO,
 711     CATEGORY_MASK_UTF_8,
 712     CATEGORY_MASK_UTF_16,
 713     CATEGORY_MASK_UTF_16,
 714     CATEGORY_MASK_UTF_16,
 715     CATEGORY_MASK_UTF_16,
 716     CATEGORY_MASK_UTF_16,
 717     CATEGORY_MASK_CHARSET,
 718     CATEGORY_MASK_SJIS,
 719     CATEGORY_MASK_BIG5,
 720     CATEGORY_MASK_CCL,
 721     CATEGORY_MASK_EMACS_MULE
 722   };
 723
 724 /*** Commonly used macros and functions ***/
 725
 726 #ifndef min
 727 #define min(a, b) ((a) < (b) ? (a) : (b))
 728 #endif
 729 #ifndef max
 730 #define max(a, b) ((a) > (b) ? (a) : (b))
 731 #endif
 732
 733 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list)  \
 734   do {                                                          \
 735     attrs = CODING_ID_ATTRS (coding->id);                       \
 736     eol_type = CODING_ID_EOL_TYPE (coding->id);                 \
 737     if (VECTORP (eol_type))                                     \
 738       eol_type = Qunix;                                         \
 739     charset_list = CODING_ATTR_CHARSET_LIST (attrs);            \
 740   } while (0)
 741
 742
 743 /* Safely get one byte from the source text pointed by SRC which ends
 744    at SRC_END, and set C to that byte.  If there are not enough bytes
 745    in the source, it jumps to `no_more_source'.  The caller
 746    should declare and set these variables appropriately in advance:
 747         src, src_end, multibytep
 748 */
 749
 750 #define ONE_MORE_BYTE(c)                                        \
 751   do {                                                          \
 752     if (src == src_end)                                         \
 753       {                                                         \
 754         if (src_base < src)                                     \
 755           coding->result = CODING_RESULT_INSUFFICIENT_SRC;      \
 756         goto no_more_source;                                    \
 757       }                                                         \
 758     c = *src++;                                                 \
 759     if (multibytep && (c & 0x80))                               \
 760       {                                                         \
 761         if ((c & 0xFE) != 0xC0)                                 \
 762           error ("Undecodable char found");                     \
 763         c = ((c & 1) << 6) | *src++;                            \
 764       }                                                         \
 765     consumed_chars++;                                           \
 766   } while (0)
 767
 768
 769 #define ONE_MORE_BYTE_NO_CHECK(c)               \
 770   do {                                          \
 771     c = *src++;                                 \
 772     if (multibytep && (c & 0x80))               \
 773       {                                         \
 774         if ((c & 0xFE) != 0xC0)                 \
 775           error ("Undecodable char found");     \
 776         c = ((c & 1) << 6) | *src++;            \
 777       }                                         \
 778     consumed_chars++;                           \
 779   } while (0)
 780
 781
 782 /* Store a byte C in the place pointed by DST and increment DST to the
 783    next free point, and increment PRODUCED_CHARS.  The caller should
 784    assure that C is 0..127, and declare and set the variable `dst'
 785    appropriately in advance.
 786 */
 787
 788
 789 #define EMIT_ONE_ASCII_BYTE(c)  \
 790   do {                          \
 791     produced_chars++;           \
 792     *dst++ = (c);               \
 793   } while (0)
 794
 795
 796 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 797
 798 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 799   do {                                  \
 800     produced_chars += 2;                \
 801     *dst++ = (c1), *dst++ = (c2);       \
 802   } while (0)
 803
 804
 805 /* Store a byte C in the place pointed by DST and increment DST to the
 806    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 807    nonzero, store in an appropriate multibyte from.  The caller should
 808    declare and set the variables `dst' and `multibytep' appropriately
 809    in advance.  */
 810
 811 #define EMIT_ONE_BYTE(c)                \
 812   do {                                  \
 813     produced_chars++;                   \
 814     if (multibytep)                     \
 815       {                                 \
 816         int ch = (c);                   \
 817         if (ch >= 0x80)                 \
 818           ch = BYTE8_TO_CHAR (ch);      \
 819         CHAR_STRING_ADVANCE (ch, dst);  \
 820       }                                 \
 821     else                                \
 822       *dst++ = (c);                     \
 823   } while (0)
 824
 825
 826 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 827
 828 #define EMIT_TWO_BYTES(c1, c2)          \
 829   do {                                  \
 830     produced_chars += 2;                \
 831     if (multibytep)                     \
 832       {                                 \
 833         int ch;                         \
 834                                         \
 835         ch = (c1);                      \
 836         if (ch >= 0x80)                 \
 837           ch = BYTE8_TO_CHAR (ch);      \
 838         CHAR_STRING_ADVANCE (ch, dst);  \
 839         ch = (c2);                      \
 840         if (ch >= 0x80)                 \
 841           ch = BYTE8_TO_CHAR (ch);      \
 842         CHAR_STRING_ADVANCE (ch, dst);  \
 843       }                                 \
 844     else                                \
 845       {                                 \
 846         *dst++ = (c1);                  \
 847         *dst++ = (c2);                  \
 848       }                                 \
 849   } while (0)
 850
 851
 852 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 853   do {                                  \
 854     EMIT_ONE_BYTE (c1);                 \
 855     EMIT_TWO_BYTES (c2, c3);            \
 856   } while (0)
 857
 858
 859 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 860   do {                                          \
 861     EMIT_TWO_BYTES (c1, c2);                    \
 862     EMIT_TWO_BYTES (c3, c4);                    \
 863   } while (0)
 864
 865
 866 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 867   do {                                                                       \
 868     charset_map_loaded = 0;                                                  \
 869     c = DECODE_CHAR (charset, code);                                         \
 870     if (charset_map_loaded)                                                  \
 871       {                                                                      \
 872         unsigned char *orig = coding->source;                                \
 873         EMACS_INT offset;                                                    \
 874                                                                              \
 875         coding_set_source (coding);                                          \
 876         offset = coding->source - orig;                                      \
 877         src += offset;                                                       \
 878         src_base += offset;                                                  \
 879         src_end += offset;                                                   \
 880       }                                                                      \
 881   } while (0)
 882
 883
 884 #define ASSURE_DESTINATION(bytes)                               \
 885   do {                                                          \
 886     if (dst + (bytes) >= dst_end)                               \
 887       {                                                         \
 888         int more_bytes = charbuf_end - charbuf + (bytes);       \
 889                                                                 \
 890         dst = alloc_destination (coding, more_bytes, dst);      \
 891         dst_end = coding->destination + coding->dst_bytes;      \
 892       }                                                         \
 893   } while (0)
 894
 895
 896
 897 static void
 898 coding_set_source (coding)
 899      struct coding_system *coding;
 900 {
 901   if (BUFFERP (coding->src_object))
 902     {
 903       if (coding->src_pos < 0)
 904         coding->source = GAP_END_ADDR + coding->src_pos_byte;
 905       else
 906         {
 907           struct buffer *buf = XBUFFER (coding->src_object);
 908           EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
 909           unsigned char *beg_addr = BUF_BEG_ADDR (buf);
 910
 911           coding->source = beg_addr + coding->src_pos_byte - 1;
 912           if (coding->src_pos_byte >= gpt_byte)
 913             coding->source += BUF_GAP_SIZE (buf);
 914         }
 915     }
 916   else if (STRINGP (coding->src_object))
 917     {
 918       coding->source = (XSTRING (coding->src_object)->data
 919                         + coding->src_pos_byte);
 920     }
 921   else
 922     /* Otherwise, the source is C string and is never relocated
 923        automatically.  Thus we don't have to update anything.  */
 924     ;
 925 }
 926
 927 static void
 928 coding_set_destination (coding)
 929      struct coding_system *coding;
 930 {
 931   if (BUFFERP (coding->dst_object))
 932     {
 933       if (coding->src_pos < 0)
 934         {
 935           coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
 936           coding->dst_bytes = (GAP_END_ADDR
 937                                - (coding->src_bytes - coding->consumed)
 938                                - coding->destination);
 939         }
 940       else
 941         {
 942           /* We are sure that coding->dst_pos_byte is before the gap
 943              of the buffer. */
 944           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
 945                                  + coding->dst_pos_byte - 1);
 946           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
 947                                - coding->destination);
 948         }
 949     }
 950   else
 951     /* Otherwise, the destination is C string and is never relocated
 952        automatically.  Thus we don't have to update anything.  */
 953     ;
 954 }
 955
 956
 957 static void
 958 coding_alloc_by_realloc (coding, bytes)
 959      struct coding_system *coding;
 960      EMACS_INT bytes;
 961 {
 962   coding->destination = (unsigned char *) xrealloc (coding->destination,
 963                                                     coding->dst_bytes + bytes);
 964   coding->dst_bytes += bytes;
 965 }
 966
 967 static void
 968 coding_alloc_by_making_gap (coding, bytes)
 969      struct coding_system *coding;
 970      EMACS_INT bytes;
 971 {
 972   if (BUFFERP (coding->dst_object)
 973       && EQ (coding->src_object, coding->dst_object))
 974     {
 975       EMACS_INT add = coding->src_bytes - coding->consumed;
 976
 977       GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
 978       make_gap (bytes);
 979       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
 980     }
 981   else
 982     {
 983       Lisp_Object this_buffer;
 984
 985       this_buffer = Fcurrent_buffer ();
 986       set_buffer_internal (XBUFFER (coding->dst_object));
 987       make_gap (bytes);
 988       set_buffer_internal (XBUFFER (this_buffer));
 989     }
 990 }
 991
 992
 993 static unsigned char *
 994 alloc_destination (coding, nbytes, dst)
 995      struct coding_system *coding;
 996      int nbytes;
 997      unsigned char *dst;
 998 {
 999   EMACS_INT offset = dst - coding->destination;
1000
1001   if (BUFFERP (coding->dst_object))
1002     coding_alloc_by_making_gap (coding, nbytes);
1003   else
1004     coding_alloc_by_realloc (coding, nbytes);
1005   coding->result = CODING_RESULT_SUCCESS;
1006   coding_set_destination (coding);
1007   dst = coding->destination + offset;
1008   return dst;
1009 }
1010
1011 /** Macros for annotations.  */
1012
1013 /* Maximum length of annotation data (sum of annotations for
1014    composition and charset).  */
1015 #define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
1016
1017 /* An annotation data is stored in the array coding->charbuf in this
1018    format:
1019      [ -LENGTH ANNOTATION_MASK FROM TO ... ]
1020    LENGTH is the number of elements in the annotation.
1021    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1022    FROM and TO specify the range of text annotated.  They are relative
1023    to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
1024
1025    The format of the following elements depend on ANNOTATION_MASK.
1026
1027    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1028    follows:
1029      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1030    METHOD is one of enum composition_method.
1031    Optionnal COMPOSITION-COMPONENTS are characters and composition
1032    rules.
1033
1034    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1035    follows.  */
1036
1037 #define ADD_ANNOTATION_DATA(buf, len, mask, from, to)   \
1038   do {                                                  \
1039     *(buf)++ = -(len);                                  \
1040     *(buf)++ = (mask);                                  \
1041     *(buf)++ = (from);                                  \
1042     *(buf)++ = (to);                                    \
1043     coding->annotated = 1;                              \
1044   } while (0);
1045
1046 #define ADD_COMPOSITION_DATA(buf, from, to, method)                           \
1047   do {                                                                        \
1048     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1049     *buf++ = method;                                                          \
1050   } while (0)
1051
1052
1053 #define ADD_CHARSET_DATA(buf, from, to, id)                               \
1054   do {                                                                    \
1055     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1056     *buf++ = id;                                                          \
1057   } while (0)
1058
1059 \f
1060 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1061
1062
1063
1064 \f
1065 /*** 3. UTF-8 ***/
1066
1067 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1068    Check if a text is encoded in UTF-8.  If it is, return 1, else
1069    return 0.  */
1070
1071 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1072 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1073 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1074 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1075 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1076 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1077
1078 static int
1079 detect_coding_utf_8 (coding, detect_info)
1080      struct coding_system *coding;
1081      struct coding_detection_info *detect_info;
1082 {
1083   unsigned char *src = coding->source, *src_base = src;
1084   unsigned char *src_end = coding->source + coding->src_bytes;
1085   int multibytep = coding->src_multibyte;
1086   int consumed_chars = 0;
1087   int found = 0;
1088   int incomplete;
1089
1090   detect_info->checked |= CATEGORY_MASK_UTF_8;
1091   /* A coding system of this category is always ASCII compatible.  */
1092   src += coding->head_ascii;
1093
1094   while (1)
1095     {
1096       int c, c1, c2, c3, c4;
1097
1098       incomplete = 0;
1099       ONE_MORE_BYTE (c);
1100       if (UTF_8_1_OCTET_P (c))
1101         continue;
1102       incomplete = 1;
1103       ONE_MORE_BYTE (c1);
1104       if (! UTF_8_EXTRA_OCTET_P (c1))
1105         break;
1106       if (UTF_8_2_OCTET_LEADING_P (c))
1107         {
1108           found = CATEGORY_MASK_UTF_8;
1109           continue;
1110         }
1111       ONE_MORE_BYTE (c2);
1112       if (! UTF_8_EXTRA_OCTET_P (c2))
1113         break;
1114       if (UTF_8_3_OCTET_LEADING_P (c))
1115         {
1116           found = CATEGORY_MASK_UTF_8;
1117           continue;
1118         }
1119       ONE_MORE_BYTE (c3);
1120       if (! UTF_8_EXTRA_OCTET_P (c3))
1121         break;
1122       if (UTF_8_4_OCTET_LEADING_P (c))
1123         {
1124           found = CATEGORY_MASK_UTF_8;
1125           continue;
1126         }
1127       ONE_MORE_BYTE (c4);
1128       if (! UTF_8_EXTRA_OCTET_P (c4))
1129         break;
1130       if (UTF_8_5_OCTET_LEADING_P (c))
1131         {
1132           found = CATEGORY_MASK_UTF_8;
1133           continue;
1134         }
1135       break;
1136     }
1137   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1138   return 0;
1139
1140  no_more_source:
1141   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1142     {
1143       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1144       return 0;
1145     }
1146   detect_info->found |= found;
1147   return 1;
1148 }
1149
1150
1151 static void
1152 decode_coding_utf_8 (coding)
1153      struct coding_system *coding;
1154 {
1155   unsigned char *src = coding->source + coding->consumed;
1156   unsigned char *src_end = coding->source + coding->src_bytes;
1157   unsigned char *src_base;
1158   int *charbuf = coding->charbuf;
1159   int *charbuf_end = charbuf + coding->charbuf_size;
1160   int consumed_chars = 0, consumed_chars_base;
1161   int multibytep = coding->src_multibyte;
1162   Lisp_Object attr, eol_type, charset_list;
1163
1164   CODING_GET_INFO (coding, attr, eol_type, charset_list);
1165
1166   while (1)
1167     {
1168       int c, c1, c2, c3, c4, c5;
1169
1170       src_base = src;
1171       consumed_chars_base = consumed_chars;
1172
1173       if (charbuf >= charbuf_end)
1174         break;
1175
1176       ONE_MORE_BYTE (c1);
1177       if (UTF_8_1_OCTET_P(c1))
1178         {
1179           c = c1;
1180           if (c == '\r')
1181             {
1182               if (EQ (eol_type, Qdos))
1183                 {
1184                   if (src == src_end)
1185                     {
1186                       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1187                       goto no_more_source;
1188                     }
1189                   if (*src == '\n')
1190                     ONE_MORE_BYTE (c);
1191                 }
1192               else if (EQ (eol_type, Qmac))
1193                 c = '\n';
1194             }
1195         }
1196       else
1197         {
1198           ONE_MORE_BYTE (c2);
1199           if (! UTF_8_EXTRA_OCTET_P (c2))
1200             goto invalid_code;
1201           if (UTF_8_2_OCTET_LEADING_P (c1))
1202             {
1203               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1204               /* Reject overlong sequences here and below.  Encoders
1205                  producing them are incorrect, they can be misleading,
1206                  and they mess up read/write invariance.  */
1207               if (c < 128)
1208                 goto invalid_code;
1209             }
1210           else
1211             {
1212               ONE_MORE_BYTE (c3);
1213               if (! UTF_8_EXTRA_OCTET_P (c3))
1214                 goto invalid_code;
1215               if (UTF_8_3_OCTET_LEADING_P (c1))
1216                 {
1217                   c = (((c1 & 0xF) << 12)
1218                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1219                   if (c < 0x800
1220                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1221                     goto invalid_code;
1222                 }
1223               else
1224                 {
1225                   ONE_MORE_BYTE (c4);
1226                   if (! UTF_8_EXTRA_OCTET_P (c4))
1227                     goto invalid_code;
1228                   if (UTF_8_4_OCTET_LEADING_P (c1))
1229                     {
1230                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1231                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1232                     if (c < 0x10000)
1233                       goto invalid_code;
1234                     }
1235                   else
1236                     {
1237                       ONE_MORE_BYTE (c5);
1238                       if (! UTF_8_EXTRA_OCTET_P (c5))
1239                         goto invalid_code;
1240                       if (UTF_8_5_OCTET_LEADING_P (c1))
1241                         {
1242                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1243                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1244                                | (c5 & 0x3F));
1245                           if ((c > MAX_CHAR) || (c < 0x200000))
1246                             goto invalid_code;
1247                         }
1248                       else
1249                         goto invalid_code;
1250                     }
1251                 }
1252             }
1253         }
1254
1255       *charbuf++ = c;
1256       continue;
1257
1258     invalid_code:
1259       src = src_base;
1260       consumed_chars = consumed_chars_base;
1261       ONE_MORE_BYTE (c);
1262       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1263       coding->errors++;
1264     }
1265
1266  no_more_source:
1267   coding->consumed_char += consumed_chars_base;
1268   coding->consumed = src_base - coding->source;
1269   coding->charbuf_used = charbuf - coding->charbuf;
1270 }
1271
1272
1273 static int
1274 encode_coding_utf_8 (coding)
1275      struct coding_system *coding;
1276 {
1277   int multibytep = coding->dst_multibyte;
1278   int *charbuf = coding->charbuf;
1279   int *charbuf_end = charbuf + coding->charbuf_used;
1280   unsigned char *dst = coding->destination + coding->produced;
1281   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1282   int produced_chars = 0;
1283   int c;
1284
1285   if (multibytep)
1286     {
1287       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1288
1289       while (charbuf < charbuf_end)
1290         {
1291           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1292
1293           ASSURE_DESTINATION (safe_room);
1294           c = *charbuf++;
1295           if (CHAR_BYTE8_P (c))
1296             {
1297               c = CHAR_TO_BYTE8 (c);
1298               EMIT_ONE_BYTE (c);
1299             }
1300           else
1301             {
1302               CHAR_STRING_ADVANCE (c, pend);
1303               for (p = str; p < pend; p++)
1304                 EMIT_ONE_BYTE (*p);
1305             }
1306         }
1307     }
1308   else
1309     {
1310       int safe_room = MAX_MULTIBYTE_LENGTH;
1311
1312       while (charbuf < charbuf_end)
1313         {
1314           ASSURE_DESTINATION (safe_room);
1315           c = *charbuf++;
1316           dst += CHAR_STRING (c, dst);
1317           produced_chars++;
1318         }
1319     }
1320   coding->result = CODING_RESULT_SUCCESS;
1321   coding->produced_char += produced_chars;
1322   coding->produced = dst - coding->destination;
1323   return 0;
1324 }
1325
1326
1327 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1328    Check if a text is encoded in one of UTF-16 based coding systems.
1329    If it is, return 1, else return 0.  */
1330
1331 #define UTF_16_HIGH_SURROGATE_P(val) \
1332   (((val) & 0xFC00) == 0xD800)
1333
1334 #define UTF_16_LOW_SURROGATE_P(val) \
1335   (((val) & 0xFC00) == 0xDC00)
1336
1337 #define UTF_16_INVALID_P(val)   \
1338   (((val) == 0xFFFE)            \
1339    || ((val) == 0xFFFF)         \
1340    || UTF_16_LOW_SURROGATE_P (val))
1341
1342
1343 static int
1344 detect_coding_utf_16 (coding, detect_info)
1345      struct coding_system *coding;
1346      struct coding_detection_info *detect_info;
1347 {
1348   unsigned char *src = coding->source, *src_base = src;
1349   unsigned char *src_end = coding->source + coding->src_bytes;
1350   int multibytep = coding->src_multibyte;
1351   int consumed_chars = 0;
1352   int c1, c2;
1353
1354   detect_info->checked |= CATEGORY_MASK_UTF_16;
1355
1356   if (coding->mode & CODING_MODE_LAST_BLOCK
1357       && (coding->src_bytes & 1))
1358     {
1359       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1360       return 0;
1361     }
1362   ONE_MORE_BYTE (c1);
1363   ONE_MORE_BYTE (c2);
1364
1365   if ((c1 == 0xFF) && (c2 == 0xFE))
1366     {
1367       detect_info->found |= CATEGORY_MASK_UTF_16_LE;
1368       detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
1369     }
1370   else if ((c1 == 0xFE) && (c2 == 0xFF))
1371     {
1372       detect_info->found |= CATEGORY_MASK_UTF_16_BE;
1373       detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
1374     }
1375  no_more_source:
1376   return 1;
1377 }
1378
1379 static void
1380 decode_coding_utf_16 (coding)
1381      struct coding_system *coding;
1382 {
1383   unsigned char *src = coding->source + coding->consumed;
1384   unsigned char *src_end = coding->source + coding->src_bytes;
1385   unsigned char *src_base;
1386   int *charbuf = coding->charbuf;
1387   int *charbuf_end = charbuf + coding->charbuf_size;
1388   int consumed_chars = 0, consumed_chars_base;
1389   int multibytep = coding->src_multibyte;
1390   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1391   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1392   int surrogate = CODING_UTF_16_SURROGATE (coding);
1393   Lisp_Object attr, eol_type, charset_list;
1394
1395   CODING_GET_INFO (coding, attr, eol_type, charset_list);
1396
1397   if (bom != utf_16_without_bom)
1398     {
1399       int c, c1, c2;
1400
1401       src_base = src;
1402       ONE_MORE_BYTE (c1);
1403       ONE_MORE_BYTE (c2);
1404       c = (c1 << 8) | c2;
1405       if (bom == utf_16_with_bom)
1406         {
1407           if (endian == utf_16_big_endian
1408               ? c != 0xFFFE : c != 0xFEFF)
1409             {
1410               /* We are sure that there's enouph room at CHARBUF.  */
1411               *charbuf++ = c1;
1412               *charbuf++ = c2;
1413               coding->errors++;
1414             }
1415         }
1416       else
1417         {
1418           if (c == 0xFFFE)
1419             CODING_UTF_16_ENDIAN (coding)
1420               = endian = utf_16_big_endian;
1421           else if (c == 0xFEFF)
1422             CODING_UTF_16_ENDIAN (coding)
1423               = endian = utf_16_little_endian;
1424           else
1425             {
1426               CODING_UTF_16_ENDIAN (coding)
1427                 = endian = utf_16_big_endian;
1428               src = src_base;
1429             }
1430         }
1431       CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1432     }
1433
1434   while (1)
1435     {
1436       int c, c1, c2;
1437
1438       src_base = src;
1439       consumed_chars_base = consumed_chars;
1440
1441       if (charbuf + 2 >= charbuf_end)
1442         break;
1443
1444       ONE_MORE_BYTE (c1);
1445       ONE_MORE_BYTE (c2);
1446       c = (endian == utf_16_big_endian
1447            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1448       if (surrogate)
1449         {
1450           if (! UTF_16_LOW_SURROGATE_P (c))
1451             {
1452               if (endian == utf_16_big_endian)
1453                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1454               else
1455                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1456               *charbuf++ = c1;
1457               *charbuf++ = c2;
1458               coding->errors++;
1459               if (UTF_16_HIGH_SURROGATE_P (c))
1460                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1461               else
1462                 *charbuf++ = c;
1463             }
1464           else
1465             {
1466               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1467               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1468               *charbuf++ = c;
1469             }
1470         }
1471       else
1472         {
1473           if (UTF_16_HIGH_SURROGATE_P (c))
1474             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1475           else
1476             *charbuf++ = c;
1477         }
1478     }
1479
1480  no_more_source:
1481   coding->consumed_char += consumed_chars_base;
1482   coding->consumed = src_base - coding->source;
1483   coding->charbuf_used = charbuf - coding->charbuf;
1484 }
1485
1486 static int
1487 encode_coding_utf_16 (coding)
1488      struct coding_system *coding;
1489 {
1490   int multibytep = coding->dst_multibyte;
1491   int *charbuf = coding->charbuf;
1492   int *charbuf_end = charbuf + coding->charbuf_used;
1493   unsigned char *dst = coding->destination + coding->produced;
1494   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1495   int safe_room = 8;
1496   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1497   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1498   int produced_chars = 0;
1499   Lisp_Object attrs, eol_type, charset_list;
1500   int c;
1501
1502   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1503
1504   if (bom == utf_16_with_bom)
1505     {
1506       ASSURE_DESTINATION (safe_room);
1507       if (big_endian)
1508         EMIT_TWO_BYTES (0xFF, 0xFE);
1509       else
1510         EMIT_TWO_BYTES (0xFE, 0xFF);
1511       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1512     }
1513
1514   while (charbuf < charbuf_end)
1515     {
1516       ASSURE_DESTINATION (safe_room);
1517       c = *charbuf++;
1518       if (c >= MAX_UNICODE_CHAR)
1519         c = coding->default_char;
1520
1521       if (c < 0x10000)
1522         {
1523           if (big_endian)
1524             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1525           else
1526             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1527         }
1528       else
1529         {
1530           int c1, c2;
1531
1532           c -= 0x10000;
1533           c1 = (c >> 10) + 0xD800;
1534           c2 = (c & 0x3FF) + 0xDC00;
1535           if (big_endian)
1536             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1537           else
1538             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1539         }
1540     }
1541   coding->result = CODING_RESULT_SUCCESS;
1542   coding->produced = dst - coding->destination;
1543   coding->produced_char += produced_chars;
1544   return 0;
1545 }
1546
1547 \f
1548 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1549
1550 /* Emacs' internal format for representation of multiple character
1551    sets is a kind of multi-byte encoding, i.e. characters are
1552    represented by variable-length sequences of one-byte codes.
1553
1554    ASCII characters and control characters (e.g. `tab', `newline') are
1555    represented by one-byte sequences which are their ASCII codes, in
1556    the range 0x00 through 0x7F.
1557
1558    8-bit characters of the range 0x80..0x9F are represented by
1559    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1560    code + 0x20).
1561
1562    8-bit characters of the range 0xA0..0xFF are represented by
1563    one-byte sequences which are their 8-bit code.
1564
1565    The other characters are represented by a sequence of `base
1566    leading-code', optional `extended leading-code', and one or two
1567    `position-code's.  The length of the sequence is determined by the
1568    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1569    whereas extended leading-code and position-code take the range 0xA0
1570    through 0xFF.  See `charset.h' for more details about leading-code
1571    and position-code.
1572
1573    --- CODE RANGE of Emacs' internal format ---
1574    character set        range
1575    -------------        -----
1576    ascii                0x00..0x7F
1577    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1578    eight-bit-graphic    0xA0..0xBF
1579    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1580    ---------------------------------------------
1581
1582    As this is the internal character representation, the format is
1583    usually not used externally (i.e. in a file or in a data sent to a
1584    process).  But, it is possible to have a text externally in this
1585    format (i.e. by encoding by the coding system `emacs-mule').
1586
1587    In that case, a sequence of one-byte codes has a slightly different
1588    form.
1589
1590    At first, all characters in eight-bit-control are represented by
1591    one-byte sequences which are their 8-bit code.
1592
1593    Next, character composition data are represented by the byte
1594    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1595    where,
1596         METHOD is 0xF0 plus one of composition method (enum
1597         composition_method),
1598
1599         BYTES is 0xA0 plus a byte length of this composition data,
1600
1601         CHARS is 0x20 plus a number of characters composed by this
1602         data,
1603
1604         COMPONENTs are characters of multibye form or composition
1605         rules encoded by two-byte of ASCII codes.
1606
1607    In addition, for backward compatibility, the following formats are
1608    also recognized as composition data on decoding.
1609
1610    0x80 MSEQ ...
1611    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1612
1613    Here,
1614         MSEQ is a multibyte form but in these special format:
1615           ASCII: 0xA0 ASCII_CODE+0x80,
1616           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1617         RULE is a one byte code of the range 0xA0..0xF0 that
1618         represents a composition rule.
1619   */
1620
1621 char emacs_mule_bytes[256];
1622
1623 int
1624 emacs_mule_char (coding, src, nbytes, nchars, id)
1625      struct coding_system *coding;
1626      unsigned char *src;
1627      int *nbytes, *nchars, *id;
1628 {
1629   unsigned char *src_end = coding->source + coding->src_bytes;
1630   int multibytep = coding->src_multibyte;
1631   unsigned char *src_base = src;
1632   struct charset *charset;
1633   unsigned code;
1634   int c;
1635   int consumed_chars = 0;
1636
1637   ONE_MORE_BYTE (c);
1638   switch (emacs_mule_bytes[c])
1639     {
1640     case 2:
1641       if (! (charset = emacs_mule_charset[c]))
1642         goto invalid_code;
1643       ONE_MORE_BYTE (c);
1644       code = c & 0x7F;
1645       break;
1646
1647     case 3:
1648       if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1649           || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1650         {
1651           ONE_MORE_BYTE (c);
1652           if (! (charset = emacs_mule_charset[c]))
1653             goto invalid_code;
1654           ONE_MORE_BYTE (c);
1655           code = c & 0x7F;
1656         }
1657       else
1658         {
1659           if (! (charset = emacs_mule_charset[c]))
1660             goto invalid_code;
1661           ONE_MORE_BYTE (c);
1662           code = (c & 0x7F) << 8;
1663           ONE_MORE_BYTE (c);
1664           code |= c & 0x7F;
1665         }
1666       break;
1667
1668     case 4:
1669       ONE_MORE_BYTE (c);
1670       if (! (charset = emacs_mule_charset[c]))
1671         goto invalid_code;
1672       ONE_MORE_BYTE (c);
1673       code = (c & 0x7F) << 8;
1674       ONE_MORE_BYTE (c);
1675       code |= c & 0x7F;
1676       break;
1677
1678     case 1:
1679       code = c;
1680       charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1681                                  ? charset_ascii : charset_eight_bit);
1682       break;
1683
1684     default:
1685       abort ();
1686     }
1687   c = DECODE_CHAR (charset, code);
1688   if (c < 0)
1689     goto invalid_code;
1690   *nbytes = src - src_base;
1691   *nchars = consumed_chars;
1692   if (id)
1693     *id = charset->id;
1694   return c;
1695
1696  no_more_source:
1697   return -2;
1698
1699  invalid_code:
1700   return -1;
1701 }
1702
1703
1704 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1705    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1706    else return 0.  */
1707
1708 static int
1709 detect_coding_emacs_mule (coding, detect_info)
1710      struct coding_system *coding;
1711      struct coding_detection_info *detect_info;
1712 {
1713   unsigned char *src = coding->source, *src_base = src;
1714   unsigned char *src_end = coding->source + coding->src_bytes;
1715   int multibytep = coding->src_multibyte;
1716   int consumed_chars = 0;
1717   int c;
1718   int found = 0;
1719   int incomplete;
1720
1721   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1722   /* A coding system of this category is always ASCII compatible.  */
1723   src += coding->head_ascii;
1724
1725   while (1)
1726     {
1727       incomplete = 0;
1728       ONE_MORE_BYTE (c);
1729       incomplete = 1;
1730
1731       if (c == 0x80)
1732         {
1733           /* Perhaps the start of composite character.  We simple skip
1734              it because analyzing it is too heavy for detecting.  But,
1735              at least, we check that the composite character
1736              constitues of more than 4 bytes.  */
1737           unsigned char *src_base;
1738
1739         repeat:
1740           src_base = src;
1741           do
1742             {
1743               ONE_MORE_BYTE (c);
1744             }
1745           while (c >= 0xA0);
1746
1747           if (src - src_base <= 4)
1748             break;
1749           found = CATEGORY_MASK_EMACS_MULE;
1750           if (c == 0x80)
1751             goto repeat;
1752         }
1753
1754       if (c < 0x80)
1755         {
1756           if (c < 0x20
1757               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1758             break;
1759         }
1760       else
1761         {
1762           unsigned char *src_base = src - 1;
1763
1764           do
1765             {
1766               ONE_MORE_BYTE (c);
1767             }
1768           while (c >= 0xA0);
1769           if (src - src_base != emacs_mule_bytes[*src_base])
1770             break;
1771           found = CATEGORY_MASK_EMACS_MULE;
1772         }
1773     }
1774   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1775   return 0;
1776
1777  no_more_source:
1778   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1779     {
1780       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1781       return 0;
1782     }
1783   detect_info->found |= found;
1784   return 1;
1785 }
1786
1787
1788 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1789
1790 /* Decode a character represented as a component of composition
1791    sequence of Emacs 20/21 style at SRC.  Set C to that character and
1792    update SRC to the head of next character (or an encoded composition
1793    rule).  If SRC doesn't points a composition component, set C to -1.
1794    If SRC points an invalid byte sequence, global exit by a return
1795    value 0.  */
1796
1797 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
1798   if (1)                                                        \
1799     {                                                           \
1800       int c;                                                    \
1801       int nbytes, nchars;                                       \
1802                                                                 \
1803       if (src == src_end)                                       \
1804         break;                                                  \
1805       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1806       if (c < 0)                                                \
1807         {                                                       \
1808           if (c == -2)                                          \
1809             break;                                              \
1810           goto invalid_code;                                    \
1811         }                                                       \
1812       *buf++ = c;                                               \
1813       src += nbytes;                                            \
1814       consumed_chars += nchars;                                 \
1815     }                                                           \
1816   else
1817
1818
1819 /* Decode a composition rule represented as a component of composition
1820    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
1821    and increment BUF.  If SRC points an invalid byte sequence, set C
1822    to -1.  */
1823
1824 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
1825   do {                                                  \
1826     int c, gref, nref;                                  \
1827                                                         \
1828     if (src >= src_end)                                 \
1829       goto invalid_code;                                \
1830     ONE_MORE_BYTE_NO_CHECK (c);                         \
1831     c -= 0x20;                                          \
1832     if (c < 0 || c >= 81)                               \
1833       goto invalid_code;                                \
1834                                                         \
1835     gref = c / 9, nref = c % 9;                         \
1836     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1837   } while (0)
1838
1839
1840 /* Decode a composition rule represented as a component of composition
1841    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
1842    and increment BUF.  If SRC points an invalid byte sequence, set C
1843    to -1.  */
1844
1845 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
1846   do {                                                  \
1847     int gref, nref;                                     \
1848                                                         \
1849     if (src + 1>= src_end)                              \
1850       goto invalid_code;                                \
1851     ONE_MORE_BYTE_NO_CHECK (gref);                      \
1852     gref -= 0x20;                                       \
1853     ONE_MORE_BYTE_NO_CHECK (nref);                      \
1854     nref -= 0x20;                                       \
1855     if (gref < 0 || gref >= 81                          \
1856         || nref < 0 || nref >= 81)                      \
1857       goto invalid_code;                                \
1858     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1859   } while (0)
1860
1861
1862 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
1863   do {                                                                  \
1864     /* Emacs 21 style format.  The first three bytes at SRC are         \
1865        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
1866        the byte length of this composition information, CHARS is the    \
1867        number of characters composed by this composition.  */           \
1868     enum composition_method method = c - 0xF2;                          \
1869     int *charbuf_base = charbuf;                                        \
1870     int from, to;                                                       \
1871     int consumed_chars_limit;                                           \
1872     int nbytes, nchars;                                                 \
1873                                                                         \
1874     ONE_MORE_BYTE (c);                                                  \
1875     nbytes = c - 0xA0;                                                  \
1876     if (nbytes < 3)                                                     \
1877       goto invalid_code;                                                \
1878     ONE_MORE_BYTE (c);                                                  \
1879     nchars = c - 0xA0;                                                  \
1880     from = coding->produced + char_offset;                              \
1881     to = from + nchars;                                                 \
1882     ADD_COMPOSITION_DATA (charbuf, from, to, method);                   \
1883     consumed_chars_limit = consumed_chars_base + nbytes;                \
1884     if (method != COMPOSITION_RELATIVE)                                 \
1885       {                                                                 \
1886         int i = 0;                                                      \
1887         while (consumed_chars < consumed_chars_limit)                   \
1888           {                                                             \
1889             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
1890               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
1891             else                                                        \
1892               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
1893             i++;                                                        \
1894           }                                                             \
1895         if (consumed_chars < consumed_chars_limit)                      \
1896           goto invalid_code;                                            \
1897         charbuf_base[0] -= i;                                           \
1898       }                                                                 \
1899   } while (0)
1900
1901
1902 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)            \
1903   do {                                                          \
1904     /* Emacs 20 style format for relative composition.  */      \
1905     /* Store multibyte form of characters to be composed.  */   \
1906     enum composition_method method = COMPOSITION_RELATIVE;      \
1907     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1908     int *buf = components;                                      \
1909     int i, j;                                                   \
1910     int from, to;                                               \
1911                                                                 \
1912     src = src_base;                                             \
1913     ONE_MORE_BYTE (c);          /* skip 0x80 */                 \
1914     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1915       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                 \
1916     if (i < 2)                                                  \
1917       goto invalid_code;                                        \
1918     from = coding->produced_char + char_offset;                 \
1919     to = from + i;                                              \
1920     ADD_COMPOSITION_DATA (charbuf, from, to, method);           \
1921     for (j = 0; j < i; j++)                                     \
1922       *charbuf++ = components[j];                               \
1923   } while (0)
1924
1925
1926 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
1927   do {                                                          \
1928     /* Emacs 20 style format for rule-base composition.  */     \
1929     /* Store multibyte form of characters to be composed.  */   \
1930     enum composition_method method = COMPOSITION_WITH_RULE;     \
1931     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1932     int *buf = components;                                      \
1933     int i, j;                                                   \
1934     int from, to;                                               \
1935                                                                 \
1936     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
1937     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1938       {                                                         \
1939         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
1940         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
1941       }                                                         \
1942     if (i < 1 || (buf - components) % 2 == 0)                   \
1943       goto invalid_code;                                        \
1944     if (charbuf + i + (i / 2) + 1 < charbuf_end)                \
1945       goto no_more_source;                                      \
1946     from = coding->produced_char + char_offset;                 \
1947     to = from + i;                                              \
1948     ADD_COMPOSITION_DATA (buf, from, to, method);               \
1949     for (j = 0; j < i; j++)                                     \
1950       *charbuf++ = components[j];                               \
1951     for (j = 0; j < i; j += 2)                                  \
1952       *charbuf++ = components[j];                               \
1953   } while (0)
1954
1955
1956 static void
1957 decode_coding_emacs_mule (coding)
1958      struct coding_system *coding;
1959 {
1960   unsigned char *src = coding->source + coding->consumed;
1961   unsigned char *src_end = coding->source + coding->src_bytes;
1962   unsigned char *src_base;
1963   int *charbuf = coding->charbuf;
1964   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
1965   int consumed_chars = 0, consumed_chars_base;
1966   int multibytep = coding->src_multibyte;
1967   Lisp_Object attrs, eol_type, charset_list;
1968   int char_offset = coding->produced_char;
1969   int last_offset = char_offset;
1970   int last_id = charset_ascii;
1971
1972   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1973
1974   while (1)
1975     {
1976       int c;
1977
1978       src_base = src;
1979       consumed_chars_base = consumed_chars;
1980
1981       if (charbuf >= charbuf_end)
1982         break;
1983
1984       ONE_MORE_BYTE (c);
1985
1986       if (c < 0x80)
1987         {
1988           if (c == '\r')
1989             {
1990               if (EQ (eol_type, Qdos))
1991                 {
1992                   if (src == src_end)
1993                     {
1994                       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1995                       goto no_more_source;
1996                     }
1997                   if (*src == '\n')
1998                     ONE_MORE_BYTE (c);
1999                 }
2000               else if (EQ (eol_type, Qmac))
2001                 c = '\n';
2002             }
2003           *charbuf++ = c;
2004           char_offset++;
2005         }
2006       else if (c == 0x80)
2007         {
2008           ONE_MORE_BYTE (c);
2009           if (c - 0xF2 >= COMPOSITION_RELATIVE
2010               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2011             DECODE_EMACS_MULE_21_COMPOSITION (c);
2012           else if (c < 0xC0)
2013             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2014           else if (c == 0xFF)
2015             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2016           else
2017             goto invalid_code;
2018         }
2019       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2020         {
2021           int nbytes, nchars;
2022           int id;
2023
2024           src = src_base;
2025           consumed_chars = consumed_chars_base;
2026           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2027           if (c < 0)
2028             {
2029               if (c == -2)
2030                 break;
2031               goto invalid_code;
2032             }
2033           if (last_id != id)
2034             {
2035               if (last_id != charset_ascii)
2036                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2037               last_id = id;
2038               last_offset = char_offset;
2039             }
2040           *charbuf++ = c;
2041           src += nbytes;
2042           consumed_chars += nchars;
2043           char_offset++;
2044         }
2045       continue;
2046
2047     invalid_code:
2048       src = src_base;
2049       consumed_chars = consumed_chars_base;
2050       ONE_MORE_BYTE (c);
2051       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2052       char_offset++;
2053       coding->errors++;
2054     }
2055
2056  no_more_source:
2057   if (last_id != charset_ascii)
2058     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2059   coding->consumed_char += consumed_chars_base;
2060   coding->consumed = src_base - coding->source;
2061   coding->charbuf_used = charbuf - coding->charbuf;
2062 }
2063
2064
2065 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2066   do {                                          \
2067     if (id < 0xA0)                              \
2068       codes[0] = id, codes[1] = 0;              \
2069     else if (id < 0xE0)                         \
2070       codes[0] = 0x9A, codes[1] = id;           \
2071     else if (id < 0xF0)                         \
2072       codes[0] = 0x9B, codes[1] = id;           \
2073     else if (id < 0xF5)                         \
2074       codes[0] = 0x9C, codes[1] = id;           \
2075     else                                        \
2076       codes[0] = 0x9D, codes[1] = id;           \
2077   } while (0);
2078
2079
2080 static int
2081 encode_coding_emacs_mule (coding)
2082      struct coding_system *coding;
2083 {
2084   int multibytep = coding->dst_multibyte;
2085   int *charbuf = coding->charbuf;
2086   int *charbuf_end = charbuf + coding->charbuf_used;
2087   unsigned char *dst = coding->destination + coding->produced;
2088   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2089   int safe_room = 8;
2090   int produced_chars = 0;
2091   Lisp_Object attrs, eol_type, charset_list;
2092   int c;
2093   int preferred_charset_id = -1;
2094
2095   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2096
2097   while (charbuf < charbuf_end)
2098     {
2099       ASSURE_DESTINATION (safe_room);
2100       c = *charbuf++;
2101
2102       if (c < 0)
2103         {
2104           /* Handle an annotation.  */
2105           switch (*charbuf)
2106             {
2107             case CODING_ANNOTATE_COMPOSITION_MASK:
2108               /* Not yet implemented.  */
2109               break;
2110             case CODING_ANNOTATE_CHARSET_MASK:
2111               preferred_charset_id = charbuf[3];
2112               if (preferred_charset_id >= 0
2113                   && NILP (Fmemq (make_number (preferred_charset_id),
2114                                   charset_list)))
2115                 preferred_charset_id = -1;
2116               break;
2117             default:
2118               abort ();
2119             }
2120           charbuf += -c - 1;
2121           continue;
2122         }
2123
2124       if (ASCII_CHAR_P (c))
2125         EMIT_ONE_ASCII_BYTE (c);
2126       else if (CHAR_BYTE8_P (c))
2127         {
2128           c = CHAR_TO_BYTE8 (c);
2129           EMIT_ONE_BYTE (c);
2130         }
2131       else
2132         {
2133           struct charset *charset;
2134           unsigned code;
2135           int dimension;
2136           int emacs_mule_id;
2137           unsigned char leading_codes[2];
2138
2139           if (preferred_charset_id >= 0)
2140             {
2141               charset = CHARSET_FROM_ID (preferred_charset_id);
2142               if (! CHAR_CHARSET_P (c, charset))
2143                 charset = char_charset (c, charset_list, NULL);
2144             }
2145           else
2146             charset = char_charset (c, charset_list, &code);
2147           if (! charset)
2148             {
2149               c = coding->default_char;
2150               if (ASCII_CHAR_P (c))
2151                 {
2152                   EMIT_ONE_ASCII_BYTE (c);
2153                   continue;
2154                 }
2155               charset = char_charset (c, charset_list, &code);
2156             }
2157           dimension = CHARSET_DIMENSION (charset);
2158           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2159           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2160           EMIT_ONE_BYTE (leading_codes[0]);
2161           if (leading_codes[1])
2162             EMIT_ONE_BYTE (leading_codes[1]);
2163           if (dimension == 1)
2164             EMIT_ONE_BYTE (code);
2165           else
2166             {
2167               EMIT_ONE_BYTE (code >> 8);
2168               EMIT_ONE_BYTE (code & 0xFF);
2169             }
2170         }
2171     }
2172   coding->result = CODING_RESULT_SUCCESS;
2173   coding->produced_char += produced_chars;
2174   coding->produced = dst - coding->destination;
2175   return 0;
2176 }
2177
2178 \f
2179 /*** 7. ISO2022 handlers ***/
2180
2181 /* The following note describes the coding system ISO2022 briefly.
2182    Since the intention of this note is to help understand the
2183    functions in this file, some parts are NOT ACCURATE or are OVERLY
2184    SIMPLIFIED.  For thorough understanding, please refer to the
2185    original document of ISO2022.  This is equivalent to the standard
2186    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2187
2188    ISO2022 provides many mechanisms to encode several character sets
2189    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2190    is encoded using bytes less than 128.  This may make the encoded
2191    text a little bit longer, but the text passes more easily through
2192    several types of gateway, some of which strip off the MSB (Most
2193    Significant Bit).
2194
2195    There are two kinds of character sets: control character sets and
2196    graphic character sets.  The former contain control characters such
2197    as `newline' and `escape' to provide control functions (control
2198    functions are also provided by escape sequences).  The latter
2199    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2200    two control character sets and many graphic character sets.
2201
2202    Graphic character sets are classified into one of the following
2203    four classes, according to the number of bytes (DIMENSION) and
2204    number of characters in one dimension (CHARS) of the set:
2205    - DIMENSION1_CHARS94
2206    - DIMENSION1_CHARS96
2207    - DIMENSION2_CHARS94
2208    - DIMENSION2_CHARS96
2209
2210    In addition, each character set is assigned an identification tag,
2211    unique for each set, called the "final character" (denoted as <F>
2212    hereafter).  The <F> of each character set is decided by ECMA(*)
2213    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2214    (0x30..0x3F are for private use only).
2215
2216    Note (*): ECMA = European Computer Manufacturers Association
2217
2218    Here are examples of graphic character sets [NAME(<F>)]:
2219         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2220         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2221         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2222         o DIMENSION2_CHARS96 -- none for the moment
2223
2224    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2225         C0 [0x00..0x1F] -- control character plane 0
2226         GL [0x20..0x7F] -- graphic character plane 0
2227         C1 [0x80..0x9F] -- control character plane 1
2228         GR [0xA0..0xFF] -- graphic character plane 1
2229
2230    A control character set is directly designated and invoked to C0 or
2231    C1 by an escape sequence.  The most common case is that:
2232    - ISO646's  control character set is designated/invoked to C0, and
2233    - ISO6429's control character set is designated/invoked to C1,
2234    and usually these designations/invocations are omitted in encoded
2235    text.  In a 7-bit environment, only C0 can be used, and a control
2236    character for C1 is encoded by an appropriate escape sequence to
2237    fit into the environment.  All control characters for C1 are
2238    defined to have corresponding escape sequences.
2239
2240    A graphic character set is at first designated to one of four
2241    graphic registers (G0 through G3), then these graphic registers are
2242    invoked to GL or GR.  These designations and invocations can be
2243    done independently.  The most common case is that G0 is invoked to
2244    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2245    these invocations and designations are omitted in encoded text.
2246    In a 7-bit environment, only GL can be used.
2247
2248    When a graphic character set of CHARS94 is invoked to GL, codes
2249    0x20 and 0x7F of the GL area work as control characters SPACE and
2250    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2251    be used.
2252
2253    There are two ways of invocation: locking-shift and single-shift.
2254    With locking-shift, the invocation lasts until the next different
2255    invocation, whereas with single-shift, the invocation affects the
2256    following character only and doesn't affect the locking-shift
2257    state.  Invocations are done by the following control characters or
2258    escape sequences:
2259
2260    ----------------------------------------------------------------------
2261    abbrev  function                  cntrl escape seq   description
2262    ----------------------------------------------------------------------
2263    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2264    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2265    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2266    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2267    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2268    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2269    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2270    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2271    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2272    ----------------------------------------------------------------------
2273    (*) These are not used by any known coding system.
2274
2275    Control characters for these functions are defined by macros
2276    ISO_CODE_XXX in `coding.h'.
2277
2278    Designations are done by the following escape sequences:
2279    ----------------------------------------------------------------------
2280    escape sequence      description
2281    ----------------------------------------------------------------------
2282    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2283    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2284    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2285    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2286    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2287    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2288    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2289    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2290    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2291    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2292    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2293    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2294    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2295    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2296    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2297    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2298    ----------------------------------------------------------------------
2299
2300    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2301    of dimension 1, chars 94, and final character <F>, etc...
2302
2303    Note (*): Although these designations are not allowed in ISO2022,
2304    Emacs accepts them on decoding, and produces them on encoding
2305    CHARS96 character sets in a coding system which is characterized as
2306    7-bit environment, non-locking-shift, and non-single-shift.
2307
2308    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2309    '(' must be omitted.  We refer to this as "short-form" hereafter.
2310
2311    Now you may notice that there are a lot of ways of encoding the
2312    same multilingual text in ISO2022.  Actually, there exist many
2313    coding systems such as Compound Text (used in X11's inter client
2314    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2315    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2316    localized platforms), and all of these are variants of ISO2022.
2317
2318    In addition to the above, Emacs handles two more kinds of escape
2319    sequences: ISO6429's direction specification and Emacs' private
2320    sequence for specifying character composition.
2321
2322    ISO6429's direction specification takes the following form:
2323         o CSI ']'      -- end of the current direction
2324         o CSI '0' ']'  -- end of the current direction
2325         o CSI '1' ']'  -- start of left-to-right text
2326         o CSI '2' ']'  -- start of right-to-left text
2327    The control character CSI (0x9B: control sequence introducer) is
2328    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2329
2330    Character composition specification takes the following form:
2331         o ESC '0' -- start relative composition
2332         o ESC '1' -- end composition
2333         o ESC '2' -- start rule-base composition (*)
2334         o ESC '3' -- start relative composition with alternate chars  (**)
2335         o ESC '4' -- start rule-base composition with alternate chars  (**)
2336   Since these are not standard escape sequences of any ISO standard,
2337   the use of them with these meanings is restricted to Emacs only.
2338
2339   (*) This form is used only in Emacs 20.7 and older versions,
2340   but newer versions can safely decode it.
2341   (**) This form is used only in Emacs 21.1 and newer versions,
2342   and older versions can't decode it.
2343
2344   Here's a list of example usages of these composition escape
2345   sequences (categorized by `enum composition_method').
2346
2347   COMPOSITION_RELATIVE:
2348         ESC 0 CHAR [ CHAR ] ESC 1
2349   COMPOSITION_WITH_RULE:
2350         ESC 2 CHAR [ RULE CHAR ] ESC 1
2351   COMPOSITION_WITH_ALTCHARS:
2352         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2353   COMPOSITION_WITH_RULE_ALTCHARS:
2354         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2355
2356 enum iso_code_class_type iso_code_class[256];
2357
2358 #define SAFE_CHARSET_P(coding, id)      \
2359   ((id) <= (coding)->max_charset_id     \
2360    && (coding)->safe_charsets[id] >= 0)
2361
2362
2363 #define SHIFT_OUT_OK(category)  \
2364   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2365
2366 static void
2367 setup_iso_safe_charsets (attrs)
2368      Lisp_Object attrs;
2369 {
2370   Lisp_Object charset_list, safe_charsets;
2371   Lisp_Object request;
2372   Lisp_Object reg_usage;
2373   Lisp_Object tail;
2374   int reg94, reg96;
2375   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2376   int max_charset_id;
2377
2378   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2379   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2380       && ! EQ (charset_list, Viso_2022_charset_list))
2381     {
2382       CODING_ATTR_CHARSET_LIST (attrs)
2383         = charset_list = Viso_2022_charset_list;
2384       ASET (attrs, coding_attr_safe_charsets, Qnil);
2385     }
2386
2387   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2388     return;
2389
2390   max_charset_id = 0;
2391   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2392     {
2393       int id = XINT (XCAR (tail));
2394       if (max_charset_id < id)
2395         max_charset_id = id;
2396     }
2397
2398   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2399                                 make_number (255));
2400   request = AREF (attrs, coding_attr_iso_request);
2401   reg_usage = AREF (attrs, coding_attr_iso_usage);
2402   reg94 = XINT (XCAR (reg_usage));
2403   reg96 = XINT (XCDR (reg_usage));
2404
2405   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2406     {
2407       Lisp_Object id;
2408       Lisp_Object reg;
2409       struct charset *charset;
2410
2411       id = XCAR (tail);
2412       charset = CHARSET_FROM_ID (XINT (id));
2413       reg = Fcdr (Fassq (id, request));
2414       if (! NILP (reg))
2415         XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2416       else if (charset->iso_chars_96)
2417         {
2418           if (reg96 < 4)
2419             XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2420         }
2421       else
2422         {
2423           if (reg94 < 4)
2424             XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2425         }
2426     }
2427   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2428 }
2429
2430
2431 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2432    Check if a text is encoded in one of ISO-2022 based codig systems.
2433    If it is, return 1, else return 0.  */
2434
2435 static int
2436 detect_coding_iso_2022 (coding, detect_info)
2437      struct coding_system *coding;
2438      struct coding_detection_info *detect_info;
2439 {
2440   unsigned char *src = coding->source, *src_base = src;
2441   unsigned char *src_end = coding->source + coding->src_bytes;
2442   int multibytep = coding->src_multibyte;
2443   int single_shifting = 0;
2444   int id;
2445   int c, c1;
2446   int consumed_chars = 0;
2447   int i;
2448   int rejected = 0;
2449   int found = 0;
2450
2451   detect_info->checked |= CATEGORY_MASK_ISO;
2452
2453   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2454     {
2455       struct coding_system *this = &(coding_categories[i]);
2456       Lisp_Object attrs, val;
2457
2458       attrs = CODING_ID_ATTRS (this->id);
2459       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2460           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2461         setup_iso_safe_charsets (attrs);
2462       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2463       this->max_charset_id = XSTRING (val)->size - 1;
2464       this->safe_charsets = (char *) XSTRING (val)->data;
2465     }
2466
2467   /* A coding system of this category is always ASCII compatible.  */
2468   src += coding->head_ascii;
2469
2470   while (rejected != CATEGORY_MASK_ISO)
2471     {
2472       ONE_MORE_BYTE (c);
2473       switch (c)
2474         {
2475         case ISO_CODE_ESC:
2476           if (inhibit_iso_escape_detection)
2477             break;
2478           single_shifting = 0;
2479           ONE_MORE_BYTE (c);
2480           if (c >= '(' && c <= '/')
2481             {
2482               /* Designation sequence for a charset of dimension 1.  */
2483               ONE_MORE_BYTE (c1);
2484               if (c1 < ' ' || c1 >= 0x80
2485                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2486                 /* Invalid designation sequence.  Just ignore.  */
2487                 break;
2488             }
2489           else if (c == '$')
2490             {
2491               /* Designation sequence for a charset of dimension 2.  */
2492               ONE_MORE_BYTE (c);
2493               if (c >= '@' && c <= 'B')
2494                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2495                 id = iso_charset_table[1][0][c];
2496               else if (c >= '(' && c <= '/')
2497                 {
2498                   ONE_MORE_BYTE (c1);
2499                   if (c1 < ' ' || c1 >= 0x80
2500                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2501                     /* Invalid designation sequence.  Just ignore.  */
2502                     break;
2503                 }
2504               else
2505                 /* Invalid designation sequence.  Just ignore it.  */
2506                 break;
2507             }
2508           else if (c == 'N' || c == 'O')
2509             {
2510               /* ESC <Fe> for SS2 or SS3.  */
2511               single_shifting = 1;
2512               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2513               break;
2514             }
2515           else if (c >= '0' && c <= '4')
2516             {
2517               /* ESC <Fp> for start/end composition.  */
2518               found |= CATEGORY_MASK_ISO;
2519               break;
2520             }
2521           else
2522             {
2523               /* Invalid escape sequence.  Just ignore it.  */
2524               break;
2525             }
2526
2527           /* We found a valid designation sequence for CHARSET.  */
2528           rejected |= CATEGORY_MASK_ISO_8BIT;
2529           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2530                               id))
2531             found |= CATEGORY_MASK_ISO_7;
2532           else
2533             rejected |= CATEGORY_MASK_ISO_7;
2534           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2535                               id))
2536             found |= CATEGORY_MASK_ISO_7_TIGHT;
2537           else
2538             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2539           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2540                               id))
2541             found |= CATEGORY_MASK_ISO_7_ELSE;
2542           else
2543             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2544           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2545                               id))
2546             found |= CATEGORY_MASK_ISO_8_ELSE;
2547           else
2548             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2549           break;
2550
2551         case ISO_CODE_SO:
2552         case ISO_CODE_SI:
2553           /* Locking shift out/in.  */
2554           if (inhibit_iso_escape_detection)
2555             break;
2556           single_shifting = 0;
2557           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2558           found |= CATEGORY_MASK_ISO_ELSE;
2559           break;
2560
2561         case ISO_CODE_CSI:
2562           /* Control sequence introducer.  */
2563           single_shifting = 0;
2564           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2565           found |= CATEGORY_MASK_ISO_8_ELSE;
2566           goto check_extra_latin;
2567
2568
2569         case ISO_CODE_SS2:
2570         case ISO_CODE_SS3:
2571           /* Single shift.   */
2572           if (inhibit_iso_escape_detection)
2573             break;
2574           single_shifting = 1;
2575           rejected |= CATEGORY_MASK_ISO_7BIT;
2576           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2577               & CODING_ISO_FLAG_SINGLE_SHIFT)
2578             found |= CATEGORY_MASK_ISO_8_1;
2579           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2580               & CODING_ISO_FLAG_SINGLE_SHIFT)
2581             found |= CATEGORY_MASK_ISO_8_2;
2582           goto check_extra_latin;
2583
2584         default:
2585           if (c < 0x80)
2586             {
2587               single_shifting = 0;
2588               break;
2589             }
2590           if (c >= 0xA0)
2591             {
2592               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2593               found |= CATEGORY_MASK_ISO_8_1;
2594               /* Check the length of succeeding codes of the range
2595                  0xA0..0FF.  If the byte length is even, we include
2596                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2597                  only when we are not single shifting.  */
2598               if (! single_shifting
2599                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2600                 {
2601                   int i = 1;
2602                   while (src < src_end)
2603                     {
2604                       ONE_MORE_BYTE (c);
2605                       if (c < 0xA0)
2606                         break;
2607                       i++;
2608                     }
2609
2610                   if (i & 1 && src < src_end)
2611                     rejected |= CATEGORY_MASK_ISO_8_2;
2612                   else
2613                     found |= CATEGORY_MASK_ISO_8_2;
2614                 }
2615               break;
2616             }
2617         check_extra_latin:
2618           single_shifting = 0;
2619           if (! VECTORP (Vlatin_extra_code_table)
2620               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2621             {
2622               rejected = CATEGORY_MASK_ISO;
2623               break;
2624             }
2625           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2626               & CODING_ISO_FLAG_LATIN_EXTRA)
2627             found |= CATEGORY_MASK_ISO_8_1;
2628           else
2629             rejected |= CATEGORY_MASK_ISO_8_1;
2630           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2631               & CODING_ISO_FLAG_LATIN_EXTRA)
2632             found |= CATEGORY_MASK_ISO_8_2;
2633           else
2634             rejected |= CATEGORY_MASK_ISO_8_2;
2635         }
2636     }
2637   detect_info->rejected |= CATEGORY_MASK_ISO;
2638   return 0;
2639
2640  no_more_source:
2641   detect_info->rejected |= rejected;
2642   detect_info->found |= (found & ~rejected);
2643   return 1;
2644 }
2645
2646
2647 /* Set designation state into CODING.  */
2648 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2649   do {                                                                  \
2650     int id, prev;                                                       \
2651                                                                         \
2652     if (final < '0' || final >= 128                                     \
2653         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2654         || !SAFE_CHARSET_P (coding, id))                                \
2655       {                                                                 \
2656         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2657         goto invalid_code;                                              \
2658       }                                                                 \
2659     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2660     if (id == charset_jisx0201_roman)                                   \
2661       {                                                                 \
2662         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2663           id = charset_ascii;                                           \
2664       }                                                                 \
2665     else if (id == charset_jisx0208_1978)                               \
2666       {                                                                 \
2667         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2668           id = charset_jisx0208;                                        \
2669       }                                                                 \
2670     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2671     /* If there was an invalid designation to REG previously, and this  \
2672        designation is ASCII to REG, we should keep this designation     \
2673        sequence.  */                                                    \
2674     if (prev == -2 && id == charset_ascii)                              \
2675       goto invalid_code;                                                \
2676   } while (0)
2677
2678
2679 #define MAYBE_FINISH_COMPOSITION()                              \
2680   do {                                                          \
2681     int i;                                                      \
2682     if (composition_state == COMPOSING_NO)                      \
2683       break;                                                    \
2684     /* It is assured that we have enough room for producing     \
2685        characters stored in the table `components'.  */         \
2686     if (charbuf + component_idx > charbuf_end)                  \
2687       goto no_more_source;                                      \
2688     composition_state = COMPOSING_NO;                           \
2689     if (method == COMPOSITION_RELATIVE                          \
2690         || method == COMPOSITION_WITH_ALTCHARS)                 \
2691       {                                                         \
2692         for (i = 0; i < component_idx; i++)                     \
2693           *charbuf++ = components[i];                           \
2694         char_offset += component_idx;                           \
2695       }                                                         \
2696     else                                                        \
2697       {                                                         \
2698         for (i = 0; i < component_idx; i += 2)                  \
2699           *charbuf++ = components[i];                           \
2700         char_offset += (component_idx / 2) + 1;                 \
2701       }                                                         \
2702   } while (0)
2703
2704
2705 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2706    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2707    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2708    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2709    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2710   */
2711
2712 #define DECODE_COMPOSITION_START(c1)                                    \
2713   do {                                                                  \
2714     if (c1 == '0'                                                       \
2715         && composition_state == COMPOSING_COMPONENT_RULE)               \
2716       {                                                                 \
2717         component_len = component_idx;                                  \
2718         composition_state = COMPOSING_CHAR;                             \
2719       }                                                                 \
2720     else                                                                \
2721       {                                                                 \
2722         unsigned char *p;                                               \
2723                                                                         \
2724         MAYBE_FINISH_COMPOSITION ();                                    \
2725         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
2726           goto no_more_source;                                          \
2727         for (p = src; p < src_end - 1; p++)                             \
2728           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
2729             break;                                                      \
2730         if (p == src_end - 1)                                           \
2731           {                                                             \
2732             if (coding->mode & CODING_MODE_LAST_BLOCK)                  \
2733               goto invalid_code;                                        \
2734             goto no_more_source;                                        \
2735           }                                                             \
2736                                                                         \
2737         /* This is surely the start of a composition.  */               \
2738         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
2739                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
2740                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
2741                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
2742         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
2743                              : COMPOSING_COMPONENT_CHAR);               \
2744         component_idx = component_len = 0;                              \
2745       }                                                                 \
2746   } while (0)
2747
2748
2749 /* Handle compositoin end sequence ESC 1.  */
2750
2751 #define DECODE_COMPOSITION_END()                                        \
2752   do {                                                                  \
2753     int nchars = (component_len > 0 ? component_idx - component_len     \
2754                   : method == COMPOSITION_RELATIVE ? component_idx      \
2755                   : (component_idx + 1) / 2);                           \
2756     int i;                                                              \
2757     int *saved_charbuf = charbuf;                                       \
2758     int from = coding->produced_char + char_offset;                     \
2759     int to = from + nchars;                                             \
2760                                                                         \
2761     ADD_COMPOSITION_DATA (charbuf, from, to, method);                   \
2762     if (method != COMPOSITION_RELATIVE)                                 \
2763       {                                                                 \
2764         if (component_len == 0)                                         \
2765           for (i = 0; i < component_idx; i++)                           \
2766             *charbuf++ = components[i];                                 \
2767         else                                                            \
2768           for (i = 0; i < component_len; i++)                           \
2769             *charbuf++ = components[i];                                 \
2770         *saved_charbuf = saved_charbuf - charbuf;                       \
2771       }                                                                 \
2772     if (method == COMPOSITION_WITH_RULE)                                \
2773       for (i = 0; i < component_idx; i += 2, char_offset++)             \
2774         *charbuf++ = components[i];                                     \
2775     else                                                                \
2776       for (i = component_len; i < component_idx; i++, char_offset++)    \
2777         *charbuf++ = components[i];                                     \
2778     coding->annotated = 1;                                              \
2779     composition_state = COMPOSING_NO;                                   \
2780   } while (0)
2781
2782
2783 /* Decode a composition rule from the byte C1 (and maybe one more byte
2784    from SRC) and store one encoded composition rule in
2785    coding->cmp_data.  */
2786
2787 #define DECODE_COMPOSITION_RULE(c1)                                     \
2788   do {                                                                  \
2789     (c1) -= 32;                                                         \
2790     if (c1 < 81)                /* old format (before ver.21) */        \
2791       {                                                                 \
2792         int gref = (c1) / 9;                                            \
2793         int nref = (c1) % 9;                                            \
2794         if (gref == 4) gref = 10;                                       \
2795         if (nref == 4) nref = 10;                                       \
2796         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
2797       }                                                                 \
2798     else if (c1 < 93)           /* new format (after ver.21) */         \
2799       {                                                                 \
2800         ONE_MORE_BYTE (c2);                                             \
2801         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
2802       }                                                                 \
2803     else                                                                \
2804       c1 = 0;                                                           \
2805   } while (0)
2806
2807
2808 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2809
2810 static void
2811 decode_coding_iso_2022 (coding)
2812      struct coding_system *coding;
2813 {
2814   unsigned char *src = coding->source + coding->consumed;
2815   unsigned char *src_end = coding->source + coding->src_bytes;
2816   unsigned char *src_base;
2817   int *charbuf = coding->charbuf;
2818   int *charbuf_end
2819     = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
2820   int consumed_chars = 0, consumed_chars_base;
2821   int multibytep = coding->src_multibyte;
2822   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
2823   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2824   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2825   struct charset *charset;
2826   int c;
2827   /* For handling composition sequence.  */
2828 #define COMPOSING_NO                    0
2829 #define COMPOSING_CHAR                  1
2830 #define COMPOSING_RULE                  2
2831 #define COMPOSING_COMPONENT_CHAR        3
2832 #define COMPOSING_COMPONENT_RULE        4
2833
2834   int composition_state = COMPOSING_NO;
2835   enum composition_method method;
2836   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2837   int component_idx;
2838   int component_len;
2839   Lisp_Object attrs, eol_type, charset_list;
2840   int char_offset = coding->produced_char;
2841   int last_offset = char_offset;
2842   int last_id = charset_ascii;
2843
2844   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2845   setup_iso_safe_charsets (attrs);
2846
2847   while (1)
2848     {
2849       int c1, c2;
2850
2851       src_base = src;
2852       consumed_chars_base = consumed_chars;
2853
2854       if (charbuf >= charbuf_end)
2855         break;
2856
2857       ONE_MORE_BYTE (c1);
2858
2859       /* We produce at most one character.  */
2860       switch (iso_code_class [c1])
2861         {
2862         case ISO_0x20_or_0x7F:
2863           if (composition_state != COMPOSING_NO)
2864             {
2865               if (composition_state == COMPOSING_RULE
2866                   || composition_state == COMPOSING_COMPONENT_RULE)
2867                 {
2868                   DECODE_COMPOSITION_RULE (c1);
2869                   components[component_idx++] = c1;
2870                   composition_state--;
2871                   continue;
2872                 }
2873             }
2874           if (charset_id_0 < 0
2875               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2876             /* This is SPACE or DEL.  */
2877             charset = CHARSET_FROM_ID (charset_ascii);
2878           else
2879             charset = CHARSET_FROM_ID (charset_id_0);
2880           break;
2881
2882         case ISO_graphic_plane_0:
2883           if (composition_state != COMPOSING_NO)
2884             {
2885               if (composition_state == COMPOSING_RULE
2886                   || composition_state == COMPOSING_COMPONENT_RULE)
2887                 {
2888                   DECODE_COMPOSITION_RULE (c1);
2889                   components[component_idx++] = c1;
2890                   composition_state--;
2891                   continue;
2892                 }
2893             }
2894           charset = CHARSET_FROM_ID (charset_id_0);
2895           break;
2896
2897         case ISO_0xA0_or_0xFF:
2898           if (charset_id_1 < 0
2899               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2900               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2901             goto invalid_code;
2902           /* This is a graphic character, we fall down ... */
2903
2904         case ISO_graphic_plane_1:
2905           if (charset_id_1 < 0)
2906             goto invalid_code;
2907           charset = CHARSET_FROM_ID (charset_id_1);
2908           break;
2909
2910         case ISO_carriage_return:
2911           if (c1 == '\r')
2912             {
2913               if (EQ (eol_type, Qdos))
2914                 {
2915                   if (src == src_end)
2916                     {
2917                       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
2918                       goto no_more_source;
2919                     }
2920                   if (*src == '\n')
2921                     ONE_MORE_BYTE (c1);
2922                 }
2923               else if (EQ (eol_type, Qmac))
2924                 c1 = '\n';
2925             }
2926           /* fall through */
2927
2928         case ISO_control_0:
2929           MAYBE_FINISH_COMPOSITION ();
2930           charset = CHARSET_FROM_ID (charset_ascii);
2931           break;
2932
2933         case ISO_control_1:
2934           MAYBE_FINISH_COMPOSITION ();
2935           goto invalid_code;
2936
2937         case ISO_shift_out:
2938           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2939               || CODING_ISO_DESIGNATION (coding, 1) < 0)
2940             goto invalid_code;
2941           CODING_ISO_INVOCATION (coding, 0) = 1;
2942           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2943           continue;
2944
2945         case ISO_shift_in:
2946           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2947             goto invalid_code;
2948           CODING_ISO_INVOCATION (coding, 0) = 0;
2949           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2950           continue;
2951
2952         case ISO_single_shift_2_7:
2953         case ISO_single_shift_2:
2954           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2955             goto invalid_code;
2956           /* SS2 is handled as an escape sequence of ESC 'N' */
2957           c1 = 'N';
2958           goto label_escape_sequence;
2959
2960         case ISO_single_shift_3:
2961           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2962             goto invalid_code;
2963           /* SS2 is handled as an escape sequence of ESC 'O' */
2964           c1 = 'O';
2965           goto label_escape_sequence;
2966
2967         case ISO_control_sequence_introducer:
2968           /* CSI is handled as an escape sequence of ESC '[' ...  */
2969           c1 = '[';
2970           goto label_escape_sequence;
2971
2972         case ISO_escape:
2973           ONE_MORE_BYTE (c1);
2974         label_escape_sequence:
2975           /* Escape sequences handled here are invocation,
2976              designation, direction specification, and character
2977              composition specification.  */
2978           switch (c1)
2979             {
2980             case '&':           /* revision of following character set */
2981               ONE_MORE_BYTE (c1);
2982               if (!(c1 >= '@' && c1 <= '~'))
2983                 goto invalid_code;
2984               ONE_MORE_BYTE (c1);
2985               if (c1 != ISO_CODE_ESC)
2986                 goto invalid_code;
2987               ONE_MORE_BYTE (c1);
2988               goto label_escape_sequence;
2989
2990             case '$':           /* designation of 2-byte character set */
2991               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2992                 goto invalid_code;
2993               ONE_MORE_BYTE (c1);
2994               if (c1 >= '@' && c1 <= 'B')
2995                 {       /* designation of JISX0208.1978, GB2312.1980,
2996                            or JISX0208.1980 */
2997                   DECODE_DESIGNATION (0, 2, 0, c1);
2998                 }
2999               else if (c1 >= 0x28 && c1 <= 0x2B)
3000                 {       /* designation of DIMENSION2_CHARS94 character set */
3001                   ONE_MORE_BYTE (c2);
3002                   DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
3003                 }
3004               else if (c1 >= 0x2C && c1 <= 0x2F)
3005                 {       /* designation of DIMENSION2_CHARS96 character set */
3006                   ONE_MORE_BYTE (c2);
3007                   DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
3008                 }
3009               else
3010                 goto invalid_code;
3011               /* We must update these variables now.  */
3012               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3013               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3014               continue;
3015
3016             case 'n':           /* invocation of locking-shift-2 */
3017               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3018                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3019                 goto invalid_code;
3020               CODING_ISO_INVOCATION (coding, 0) = 2;
3021               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3022               continue;
3023
3024             case 'o':           /* invocation of locking-shift-3 */
3025               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3026                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3027                 goto invalid_code;
3028               CODING_ISO_INVOCATION (coding, 0) = 3;
3029               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3030               continue;
3031
3032             case 'N':           /* invocation of single-shift-2 */
3033               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3034                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3035                 goto invalid_code;
3036               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
3037               ONE_MORE_BYTE (c1);
3038               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3039                 goto invalid_code;
3040               break;
3041
3042             case 'O':           /* invocation of single-shift-3 */
3043               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3044                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3045                 goto invalid_code;
3046               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
3047               ONE_MORE_BYTE (c1);
3048               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3049                 goto invalid_code;
3050               break;
3051
3052             case '0': case '2': case '3': case '4': /* start composition */
3053               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3054                 goto invalid_code;
3055               DECODE_COMPOSITION_START (c1);
3056               continue;
3057
3058             case '1':           /* end composition */
3059               if (composition_state == COMPOSING_NO)
3060                 goto invalid_code;
3061               DECODE_COMPOSITION_END ();
3062               continue;
3063
3064             case '[':           /* specification of direction */
3065               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3066                 goto invalid_code;
3067               /* For the moment, nested direction is not supported.
3068                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3069                  left-to-right, and nozero means right-to-left.  */
3070               ONE_MORE_BYTE (c1);
3071               switch (c1)
3072                 {
3073                 case ']':       /* end of the current direction */
3074                   coding->mode &= ~CODING_MODE_DIRECTION;
3075
3076                 case '0':       /* end of the current direction */
3077                 case '1':       /* start of left-to-right direction */
3078                   ONE_MORE_BYTE (c1);
3079                   if (c1 == ']')
3080                     coding->mode &= ~CODING_MODE_DIRECTION;
3081                   else
3082                     goto invalid_code;
3083                   break;
3084
3085                 case '2':       /* start of right-to-left direction */
3086                   ONE_MORE_BYTE (c1);
3087                   if (c1 == ']')
3088                     coding->mode |= CODING_MODE_DIRECTION;
3089                   else
3090                     goto invalid_code;
3091                   break;
3092
3093                 default:
3094                   goto invalid_code;
3095                 }
3096               continue;
3097
3098             default:
3099               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3100                 goto invalid_code;
3101               if (c1 >= 0x28 && c1 <= 0x2B)
3102                 {       /* designation of DIMENSION1_CHARS94 character set */
3103                   ONE_MORE_BYTE (c2);
3104                   DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
3105                 }
3106               else if (c1 >= 0x2C && c1 <= 0x2F)
3107                 {       /* designation of DIMENSION1_CHARS96 character set */
3108                   ONE_MORE_BYTE (c2);
3109                   DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
3110                 }
3111               else
3112                 goto invalid_code;
3113               /* We must update these variables now.  */
3114               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3115               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3116               continue;
3117             }
3118         }
3119
3120       if (charset->id != charset_ascii
3121           && last_id != charset->id)
3122         {
3123           if (last_id != charset_ascii)
3124             ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3125           last_id = charset->id;
3126           last_offset = char_offset;
3127         }
3128
3129       /* Now we know CHARSET and 1st position code C1 of a character.
3130          Produce a decoded character while getting 2nd position code
3131          C2 if necessary.  */
3132       c1 &= 0x7F;
3133       if (CHARSET_DIMENSION (charset) > 1)
3134         {
3135           ONE_MORE_BYTE (c2);
3136           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3137             /* C2 is not in a valid range.  */
3138             goto invalid_code;
3139           c1 = (c1 << 8) | (c2 & 0x7F);
3140           if (CHARSET_DIMENSION (charset) > 2)
3141             {
3142               ONE_MORE_BYTE (c2);
3143               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3144                 /* C2 is not in a valid range.  */
3145                 goto invalid_code;
3146               c1 = (c1 << 8) | (c2 & 0x7F);
3147             }
3148         }
3149
3150       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3151       if (c < 0)
3152         {
3153           MAYBE_FINISH_COMPOSITION ();
3154           for (; src_base < src; src_base++, char_offset++)
3155             {
3156               if (ASCII_BYTE_P (*src_base))
3157                 *charbuf++ = *src_base;
3158               else
3159                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3160               char_offset++;
3161             }
3162         }
3163       else if (composition_state == COMPOSING_NO)
3164         {
3165           *charbuf++ = c;
3166           char_offset++;
3167         }
3168       else
3169         {
3170           components[component_idx++] = c;
3171           if (method == COMPOSITION_WITH_RULE
3172               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3173                   && composition_state == COMPOSING_COMPONENT_CHAR))
3174             composition_state++;
3175         }
3176       continue;
3177
3178     invalid_code:
3179       MAYBE_FINISH_COMPOSITION ();
3180       src = src_base;
3181       consumed_chars = consumed_chars_base;
3182       ONE_MORE_BYTE (c);
3183       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3184       char_offset++;
3185       coding->errors++;
3186     }
3187
3188  no_more_source:
3189   if (last_id != charset_ascii)
3190     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3191   coding->consumed_char += consumed_chars_base;
3192   coding->consumed = src_base - coding->source;
3193   coding->charbuf_used = charbuf - coding->charbuf;
3194 }
3195
3196
3197 /* ISO2022 encoding stuff.  */
3198
3199 /*
3200    It is not enough to say just "ISO2022" on encoding, we have to
3201    specify more details.  In Emacs, each coding system of ISO2022
3202    variant has the following specifications:
3203         1. Initial designation to G0 thru G3.
3204         2. Allows short-form designation?
3205         3. ASCII should be designated to G0 before control characters?
3206         4. ASCII should be designated to G0 at end of line?
3207         5. 7-bit environment or 8-bit environment?
3208         6. Use locking-shift?
3209         7. Use Single-shift?
3210    And the following two are only for Japanese:
3211         8. Use ASCII in place of JIS0201-1976-Roman?
3212         9. Use JISX0208-1983 in place of JISX0208-1978?
3213    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3214    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3215    details.
3216 */
3217
3218 /* Produce codes (escape sequence) for designating CHARSET to graphic
3219    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3220    '@', 'A', or 'B' and the coding system CODING allows, produce
3221    designation sequence of short-form.  */
3222
3223 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3224   do {                                                                  \
3225     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3226     char *intermediate_char_94 = "()*+";                                \
3227     char *intermediate_char_96 = ",-./";                                \
3228     int revision = -1;                                                  \
3229     int c;                                                              \
3230                                                                         \
3231     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3232       revision = CHARSET_ISO_REVISION (charset);                        \
3233                                                                         \
3234     if (revision >= 0)                                                  \
3235       {                                                                 \
3236         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3237         EMIT_ONE_BYTE ('@' + revision);                                 \
3238       }                                                                 \
3239     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3240     if (CHARSET_DIMENSION (charset) == 1)                               \
3241       {                                                                 \
3242         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3243           c = intermediate_char_94[reg];                                \
3244         else                                                            \
3245           c = intermediate_char_96[reg];                                \
3246         EMIT_ONE_ASCII_BYTE (c);                                        \
3247       }                                                                 \
3248     else                                                                \
3249       {                                                                 \
3250         EMIT_ONE_ASCII_BYTE ('$');                                      \
3251         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3252           {                                                             \
3253             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3254                 || reg != 0                                             \
3255                 || final_char < '@' || final_char > 'B')                \
3256               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3257           }                                                             \
3258         else                                                            \
3259           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3260       }                                                                 \
3261     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3262                                                                         \
3263     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3264   } while (0)
3265
3266
3267 /* The following two macros produce codes (control character or escape
3268    sequence) for ISO2022 single-shift functions (single-shift-2 and
3269    single-shift-3).  */
3270
3271 #define ENCODE_SINGLE_SHIFT_2                                           \
3272   do {                                                                  \
3273     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3274       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3275     else                                                                \
3276       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3277     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3278   } while (0)
3279
3280
3281 #define ENCODE_SINGLE_SHIFT_3                                           \
3282   do {                                                                  \
3283     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3284       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3285     else                                                                \
3286       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3287     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3288   } while (0)
3289
3290
3291 /* The following four macros produce codes (control character or
3292    escape sequence) for ISO2022 locking-shift functions (shift-in,
3293    shift-out, locking-shift-2, and locking-shift-3).  */
3294
3295 #define ENCODE_SHIFT_IN                                 \
3296   do {                                                  \
3297     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3298     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3299   } while (0)
3300
3301
3302 #define ENCODE_SHIFT_OUT                                \
3303   do {                                                  \
3304     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3305     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3306   } while (0)
3307
3308
3309 #define ENCODE_LOCKING_SHIFT_2                          \
3310   do {                                                  \
3311     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3312     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3313   } while (0)
3314
3315
3316 #define ENCODE_LOCKING_SHIFT_3                          \
3317   do {                                                  \
3318     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3319     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3320   } while (0)
3321
3322
3323 /* Produce codes for a DIMENSION1 character whose character set is
3324    CHARSET and whose position-code is C1.  Designation and invocation
3325    sequences are also produced in advance if necessary.  */
3326
3327 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3328   do {                                                                  \
3329     int id = CHARSET_ID (charset);                                      \
3330                                                                         \
3331     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3332         && id == charset_ascii)                                         \
3333       {                                                                 \
3334         id = charset_jisx0201_roman;                                    \
3335         charset = CHARSET_FROM_ID (id);                                 \
3336       }                                                                 \
3337                                                                         \
3338     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3339       {                                                                 \
3340         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3341           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3342         else                                                            \
3343           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3344         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3345         break;                                                          \
3346       }                                                                 \
3347     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3348       {                                                                 \
3349         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3350         break;                                                          \
3351       }                                                                 \
3352     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3353       {                                                                 \
3354         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3355         break;                                                          \
3356       }                                                                 \
3357     else                                                                \
3358       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3359          must invoke it, or, at first, designate it to some graphic     \
3360          register.  Then repeat the loop to actually produce the        \
3361          character.  */                                                 \
3362       dst = encode_invocation_designation (charset, coding, dst,        \
3363                                            &produced_chars);            \
3364   } while (1)
3365
3366
3367 /* Produce codes for a DIMENSION2 character whose character set is
3368    CHARSET and whose position-codes are C1 and C2.  Designation and
3369    invocation codes are also produced in advance if necessary.  */
3370
3371 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3372   do {                                                                  \
3373     int id = CHARSET_ID (charset);                                      \
3374                                                                         \
3375     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3376         && id == charset_jisx0208)                                      \
3377       {                                                                 \
3378         id = charset_jisx0208_1978;                                     \
3379         charset = CHARSET_FROM_ID (id);                                 \
3380       }                                                                 \
3381                                                                         \
3382     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3383       {                                                                 \
3384         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3385           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3386         else                                                            \
3387           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3388         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3389         break;                                                          \
3390       }                                                                 \
3391     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3392       {                                                                 \
3393         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3394         break;                                                          \
3395       }                                                                 \
3396     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3397       {                                                                 \
3398         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3399         break;                                                          \
3400       }                                                                 \
3401     else                                                                \
3402       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3403          must invoke it, or, at first, designate it to some graphic     \
3404          register.  Then repeat the loop to actually produce the        \
3405          character.  */                                                 \
3406       dst = encode_invocation_designation (charset, coding, dst,        \
3407                                            &produced_chars);            \
3408   } while (1)
3409
3410
3411 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3412   do {                                                                     \
3413     int code = ENCODE_CHAR ((charset),(c));                                \
3414                                                                            \
3415     if (CHARSET_DIMENSION (charset) == 1)                                  \
3416       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3417     else                                                                   \
3418       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3419   } while (0)
3420
3421
3422 /* Produce designation and invocation codes at a place pointed by DST
3423    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3424    Return new DST.  */
3425
3426 unsigned char *
3427 encode_invocation_designation (charset, coding, dst, p_nchars)
3428      struct charset *charset;
3429      struct coding_system *coding;
3430      unsigned char *dst;
3431      int *p_nchars;
3432 {
3433   int multibytep = coding->dst_multibyte;
3434   int produced_chars = *p_nchars;
3435   int reg;                      /* graphic register number */
3436   int id = CHARSET_ID (charset);
3437
3438   /* At first, check designations.  */
3439   for (reg = 0; reg < 4; reg++)
3440     if (id == CODING_ISO_DESIGNATION (coding, reg))
3441       break;
3442
3443   if (reg >= 4)
3444     {
3445       /* CHARSET is not yet designated to any graphic registers.  */
3446       /* At first check the requested designation.  */
3447       reg = CODING_ISO_REQUEST (coding, id);
3448       if (reg < 0)
3449         /* Since CHARSET requests no special designation, designate it
3450            to graphic register 0.  */
3451         reg = 0;
3452
3453       ENCODE_DESIGNATION (charset, reg, coding);
3454     }
3455
3456   if (CODING_ISO_INVOCATION (coding, 0) != reg
3457       && CODING_ISO_INVOCATION (coding, 1) != reg)
3458     {
3459       /* Since the graphic register REG is not invoked to any graphic
3460          planes, invoke it to graphic plane 0.  */
3461       switch (reg)
3462         {
3463         case 0:                 /* graphic register 0 */
3464           ENCODE_SHIFT_IN;
3465           break;
3466
3467         case 1:                 /* graphic register 1 */
3468           ENCODE_SHIFT_OUT;
3469           break;
3470
3471         case 2:                 /* graphic register 2 */
3472           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3473             ENCODE_SINGLE_SHIFT_2;
3474           else
3475             ENCODE_LOCKING_SHIFT_2;
3476           break;
3477
3478         case 3:                 /* graphic register 3 */
3479           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3480             ENCODE_SINGLE_SHIFT_3;
3481           else
3482             ENCODE_LOCKING_SHIFT_3;
3483           break;
3484         }
3485     }
3486
3487   *p_nchars = produced_chars;
3488   return dst;
3489 }
3490
3491 /* The following three macros produce codes for indicating direction
3492    of text.  */
3493 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3494   do {                                                                  \
3495     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3496       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3497     else                                                                \
3498       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3499   } while (0)
3500
3501
3502 #define ENCODE_DIRECTION_R2L()                  \
3503   do {                                          \
3504     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3505     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3506   } while (0)
3507
3508
3509 #define ENCODE_DIRECTION_L2R()                  \
3510   do {                                          \
3511     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3512     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3513   } while (0)
3514
3515
3516 /* Produce codes for designation and invocation to reset the graphic
3517    planes and registers to initial state.  */
3518 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3519   do {                                                                  \
3520     int reg;                                                            \
3521     struct charset *charset;                                            \
3522                                                                         \
3523     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3524       ENCODE_SHIFT_IN;                                                  \
3525     for (reg = 0; reg < 4; reg++)                                       \
3526       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3527           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3528               != CODING_ISO_INITIAL (coding, reg)))                     \
3529         {                                                               \
3530           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3531           ENCODE_DESIGNATION (charset, reg, coding);                    \
3532         }                                                               \
3533   } while (0)
3534
3535
3536 /* Produce designation sequences of charsets in the line started from
3537    SRC to a place pointed by DST, and return updated DST.
3538
3539    If the current block ends before any end-of-line, we may fail to
3540    find all the necessary designations.  */
3541
3542 static unsigned char *
3543 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3544      struct coding_system *coding;
3545      int *charbuf, *charbuf_end;
3546      unsigned char *dst;
3547 {
3548   struct charset *charset;
3549   /* Table of charsets to be designated to each graphic register.  */
3550   int r[4];
3551   int c, found = 0, reg;
3552   int produced_chars = 0;
3553   int multibytep = coding->dst_multibyte;
3554   Lisp_Object attrs;
3555   Lisp_Object charset_list;
3556
3557   attrs = CODING_ID_ATTRS (coding->id);
3558   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3559   if (EQ (charset_list, Qiso_2022))
3560     charset_list = Viso_2022_charset_list;
3561
3562   for (reg = 0; reg < 4; reg++)
3563     r[reg] = -1;
3564
3565   while (found < 4)
3566     {
3567       int id;
3568
3569       c = *charbuf++;
3570       if (c == '\n')
3571         break;
3572       charset = char_charset (c, charset_list, NULL);
3573       id = CHARSET_ID (charset);
3574       reg = CODING_ISO_REQUEST (coding, id);
3575       if (reg >= 0 && r[reg] < 0)
3576         {
3577           found++;
3578           r[reg] = id;
3579         }
3580     }
3581
3582   if (found)
3583     {
3584       for (reg = 0; reg < 4; reg++)
3585         if (r[reg] >= 0
3586             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3587           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3588     }
3589
3590   return dst;
3591 }
3592
3593 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
3594
3595 static int
3596 encode_coding_iso_2022 (coding)
3597      struct coding_system *coding;
3598 {
3599   int multibytep = coding->dst_multibyte;
3600   int *charbuf = coding->charbuf;
3601   int *charbuf_end = charbuf + coding->charbuf_used;
3602   unsigned char *dst = coding->destination + coding->produced;
3603   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3604   int safe_room = 16;
3605   int bol_designation
3606     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3607        && CODING_ISO_BOL (coding));
3608   int produced_chars = 0;
3609   Lisp_Object attrs, eol_type, charset_list;
3610   int ascii_compatible;
3611   int c;
3612   int preferred_charset_id = -1;
3613
3614   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3615   setup_iso_safe_charsets (attrs);
3616   /* Charset list may have been changed.  */
3617   charset_list = CODING_ATTR_CHARSET_LIST (attrs);              \
3618   coding->safe_charsets
3619     = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
3620
3621   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3622
3623   while (charbuf < charbuf_end)
3624     {
3625       ASSURE_DESTINATION (safe_room);
3626
3627       if (bol_designation)
3628         {
3629           unsigned char *dst_prev = dst;
3630
3631           /* We have to produce designation sequences if any now.  */
3632           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3633           bol_designation = 0;
3634           /* We are sure that designation sequences are all ASCII bytes.  */
3635           produced_chars += dst - dst_prev;
3636         }
3637
3638       c = *charbuf++;
3639
3640       if (c < 0)
3641         {
3642           /* Handle an annotation.  */
3643           switch (*charbuf)
3644             {
3645             case CODING_ANNOTATE_COMPOSITION_MASK:
3646               /* Not yet implemented.  */
3647               break;
3648             case CODING_ANNOTATE_CHARSET_MASK:
3649               preferred_charset_id = charbuf[3];
3650               if (preferred_charset_id >= 0
3651                   && NILP (Fmemq (make_number (preferred_charset_id),
3652                                   charset_list)))
3653                 preferred_charset_id = -1;
3654               break;
3655             default:
3656               abort ();
3657             }
3658           charbuf += -c - 1;
3659           continue;
3660         }
3661
3662       /* Now encode the character C.  */
3663       if (c < 0x20 || c == 0x7F)
3664         {
3665           if (c == '\n'
3666               || (c == '\r' && EQ (eol_type, Qmac)))
3667             {
3668               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3669                 ENCODE_RESET_PLANE_AND_REGISTER ();
3670               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3671                 {
3672                   int i;
3673
3674                   for (i = 0; i < 4; i++)
3675                     CODING_ISO_DESIGNATION (coding, i)
3676                       = CODING_ISO_INITIAL (coding, i);
3677                 }
3678               bol_designation
3679                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3680             }
3681           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3682             ENCODE_RESET_PLANE_AND_REGISTER ();
3683           EMIT_ONE_ASCII_BYTE (c);
3684         }
3685       else if (ASCII_CHAR_P (c))
3686         {
3687           if (ascii_compatible)
3688             EMIT_ONE_ASCII_BYTE (c);
3689           else
3690             {
3691               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3692               ENCODE_ISO_CHARACTER (charset, c);
3693             }
3694         }
3695       else if (CHAR_BYTE8_P (c))
3696         {
3697           c = CHAR_TO_BYTE8 (c);
3698           EMIT_ONE_BYTE (c);
3699         }
3700       else
3701         {
3702           struct charset *charset;
3703
3704           if (preferred_charset_id >= 0)
3705             {
3706               charset = CHARSET_FROM_ID (preferred_charset_id);
3707               if (! CHAR_CHARSET_P (c, charset))
3708                 charset = char_charset (c, charset_list, NULL);
3709             }
3710           else
3711             charset = char_charset (c, charset_list, NULL);
3712           if (!charset)
3713             {
3714               if (coding->mode & CODING_MODE_SAFE_ENCODING)
3715                 {
3716                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3717                   charset = CHARSET_FROM_ID (charset_ascii);
3718                 }
3719               else
3720                 {
3721                   c = coding->default_char;
3722                   charset = char_charset (c, charset_list, NULL);
3723                 }
3724             }
3725           ENCODE_ISO_CHARACTER (charset, c);
3726         }
3727     }
3728
3729   if (coding->mode & CODING_MODE_LAST_BLOCK
3730       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3731     {
3732       ASSURE_DESTINATION (safe_room);
3733       ENCODE_RESET_PLANE_AND_REGISTER ();
3734     }
3735   coding->result = CODING_RESULT_SUCCESS;
3736   CODING_ISO_BOL (coding) = bol_designation;
3737   coding->produced_char += produced_chars;
3738   coding->produced = dst - coding->destination;
3739   return 0;
3740 }
3741
3742 \f
3743 /*** 8,9. SJIS and BIG5 handlers ***/
3744
3745 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3746    quite widely.  So, for the moment, Emacs supports them in the bare
3747    C code.  But, in the future, they may be supported only by CCL.  */
3748
3749 /* SJIS is a coding system encoding three character sets: ASCII, right
3750    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
3751    as is.  A character of charset katakana-jisx0201 is encoded by
3752    "position-code + 0x80".  A character of charset japanese-jisx0208
3753    is encoded in 2-byte but two position-codes are divided and shifted
3754    so that it fit in the range below.
3755
3756    --- CODE RANGE of SJIS ---
3757    (character set)      (range)
3758    ASCII                0x00 .. 0x7F
3759    KATAKANA-JISX0201    0xA0 .. 0xDF
3760    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
3761             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
3762    -------------------------------
3763
3764 */
3765
3766 /* BIG5 is a coding system encoding two character sets: ASCII and
3767    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
3768    character set and is encoded in two-byte.
3769
3770    --- CODE RANGE of BIG5 ---
3771    (character set)      (range)
3772    ASCII                0x00 .. 0x7F
3773    Big5 (1st byte)      0xA1 .. 0xFE
3774         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
3775    --------------------------
3776
3777   */
3778
3779 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3780    Check if a text is encoded in SJIS.  If it is, return
3781    CATEGORY_MASK_SJIS, else return 0.  */
3782
3783 static int
3784 detect_coding_sjis (coding, detect_info)
3785      struct coding_system *coding;
3786      struct coding_detection_info *detect_info;
3787 {
3788   unsigned char *src = coding->source, *src_base = src;
3789   unsigned char *src_end = coding->source + coding->src_bytes;
3790   int multibytep = coding->src_multibyte;
3791   int consumed_chars = 0;
3792   int found = 0;
3793   int c;
3794   int incomplete;
3795
3796   detect_info->checked |= CATEGORY_MASK_SJIS;
3797   /* A coding system of this category is always ASCII compatible.  */
3798   src += coding->head_ascii;
3799
3800   while (1)
3801     {
3802       incomplete = 0;
3803       ONE_MORE_BYTE (c);
3804       incomplete = 1;
3805       if (c < 0x80)
3806         continue;
3807       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3808         {
3809           ONE_MORE_BYTE (c);
3810           if (c < 0x40 || c == 0x7F || c > 0xFC)
3811             break;
3812           found = CATEGORY_MASK_SJIS;
3813         }
3814       else if (c >= 0xA0 && c < 0xE0)
3815         found = CATEGORY_MASK_SJIS;
3816       else
3817         break;
3818     }
3819   detect_info->rejected |= CATEGORY_MASK_SJIS;
3820   return 0;
3821
3822  no_more_source:
3823   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3824     {
3825       detect_info->rejected |= CATEGORY_MASK_SJIS;
3826       return 0;
3827     }
3828   detect_info->found |= found;
3829   return 1;
3830 }
3831
3832 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3833    Check if a text is encoded in BIG5.  If it is, return
3834    CATEGORY_MASK_BIG5, else return 0.  */
3835
3836 static int
3837 detect_coding_big5 (coding, detect_info)
3838      struct coding_system *coding;
3839      struct coding_detection_info *detect_info;
3840 {
3841   unsigned char *src = coding->source, *src_base = src;
3842   unsigned char *src_end = coding->source + coding->src_bytes;
3843   int multibytep = coding->src_multibyte;
3844   int consumed_chars = 0;
3845   int found = 0;
3846   int c;
3847   int incomplete;
3848
3849   detect_info->checked |= CATEGORY_MASK_BIG5;
3850   /* A coding system of this category is always ASCII compatible.  */
3851   src += coding->head_ascii;
3852
3853   while (1)
3854     {
3855       incomplete = 0;
3856       ONE_MORE_BYTE (c);
3857       incomplete = 1;
3858       if (c < 0x80)
3859         continue;
3860       if (c >= 0xA1)
3861         {
3862           ONE_MORE_BYTE (c);
3863           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3864             return 0;
3865           found = CATEGORY_MASK_BIG5;
3866         }
3867       else
3868         break;
3869     }
3870   detect_info->rejected |= CATEGORY_MASK_BIG5;
3871   return 0;
3872
3873  no_more_source:
3874   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3875     {
3876       detect_info->rejected |= CATEGORY_MASK_BIG5;
3877       return 0;
3878     }
3879   detect_info->found |= found;
3880   return 1;
3881 }
3882
3883 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3884    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3885
3886 static void
3887 decode_coding_sjis (coding)
3888      struct coding_system *coding;
3889 {
3890   unsigned char *src = coding->source + coding->consumed;
3891   unsigned char *src_end = coding->source + coding->src_bytes;
3892   unsigned char *src_base;
3893   int *charbuf = coding->charbuf;
3894   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
3895   int consumed_chars = 0, consumed_chars_base;
3896   int multibytep = coding->src_multibyte;
3897   struct charset *charset_roman, *charset_kanji, *charset_kana;
3898   Lisp_Object attrs, eol_type, charset_list, val;
3899   int char_offset = coding->produced_char;
3900   int last_offset = char_offset;
3901   int last_id = charset_ascii;
3902
3903   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3904
3905   val = charset_list;
3906   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3907   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3908   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3909
3910   while (1)
3911     {
3912       int c, c1;
3913
3914       src_base = src;
3915       consumed_chars_base = consumed_chars;
3916
3917       if (charbuf >= charbuf_end)
3918         break;
3919
3920       ONE_MORE_BYTE (c);
3921
3922       if (c == '\r')
3923         {
3924           if (EQ (eol_type, Qdos))
3925             {
3926               if (src == src_end)
3927                 {
3928                   coding->result = CODING_RESULT_INSUFFICIENT_SRC;
3929                   goto no_more_source;
3930                 }
3931               if (*src == '\n')
3932                 ONE_MORE_BYTE (c);
3933             }
3934           else if (EQ (eol_type, Qmac))
3935             c = '\n';
3936         }
3937       else
3938         {
3939           struct charset *charset;
3940
3941           if (c < 0x80)
3942             charset = charset_roman;
3943           else
3944             {
3945               if (c >= 0xF0)
3946                 goto invalid_code;
3947               if (c < 0xA0 || c >= 0xE0)
3948                 {
3949                   /* SJIS -> JISX0208 */
3950                   ONE_MORE_BYTE (c1);
3951                   if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3952                     goto invalid_code;
3953                   c = (c << 8) | c1;
3954                   SJIS_TO_JIS (c);
3955                   charset = charset_kanji;
3956                 }
3957               else
3958                 {
3959                   /* SJIS -> JISX0201-Kana */
3960                   c &= 0x7F;
3961                   charset = charset_kana;
3962                 }
3963             }
3964           if (charset->id != charset_ascii
3965               && last_id != charset->id)
3966             {
3967               if (last_id != charset_ascii)
3968                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3969               last_id = charset->id;
3970               last_offset = char_offset;
3971             }
3972           CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3973         }
3974       *charbuf++ = c;
3975       char_offset++;
3976       continue;
3977
3978     invalid_code:
3979       src = src_base;
3980       consumed_chars = consumed_chars_base;
3981       ONE_MORE_BYTE (c);
3982       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3983       char_offset++;
3984       coding->errors++;
3985     }
3986
3987  no_more_source:
3988   if (last_id != charset_ascii)
3989     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3990   coding->consumed_char += consumed_chars_base;
3991   coding->consumed = src_base - coding->source;
3992   coding->charbuf_used = charbuf - coding->charbuf;
3993 }
3994
3995 static void
3996 decode_coding_big5 (coding)
3997      struct coding_system *coding;
3998 {
3999   unsigned char *src = coding->source + coding->consumed;
4000   unsigned char *src_end = coding->source + coding->src_bytes;
4001   unsigned char *src_base;
4002   int *charbuf = coding->charbuf;
4003   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4004   int consumed_chars = 0, consumed_chars_base;
4005   int multibytep = coding->src_multibyte;
4006   struct charset *charset_roman, *charset_big5;
4007   Lisp_Object attrs, eol_type, charset_list, val;
4008   int char_offset = coding->produced_char;
4009   int last_offset = char_offset;
4010   int last_id = charset_ascii;
4011
4012   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4013   val = charset_list;
4014   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4015   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4016
4017   while (1)
4018     {
4019       int c, c1;
4020
4021       src_base = src;
4022       consumed_chars_base = consumed_chars;
4023
4024       if (charbuf >= charbuf_end)
4025         break;
4026
4027       ONE_MORE_BYTE (c);
4028
4029       if (c == '\r')
4030         {
4031           if (EQ (eol_type, Qdos))
4032             {
4033               if (src == src_end)
4034                 {
4035                   coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4036                   goto no_more_source;
4037                 }
4038               if (*src == '\n')
4039                 ONE_MORE_BYTE (c);
4040             }
4041           else if (EQ (eol_type, Qmac))
4042             c = '\n';
4043         }
4044       else
4045         {
4046           struct charset *charset;
4047           if (c < 0x80)
4048             charset = charset_roman;
4049           else
4050             {
4051               /* BIG5 -> Big5 */
4052               if (c < 0xA1 || c > 0xFE)
4053                 goto invalid_code;
4054               ONE_MORE_BYTE (c1);
4055               if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4056                 goto invalid_code;
4057               c = c << 8 | c1;
4058               charset = charset_big5;
4059             }
4060           if (charset->id != charset_ascii
4061               && last_id != charset->id)
4062             {
4063               if (last_id != charset_ascii)
4064                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4065               last_id = charset->id;
4066               last_offset = char_offset;
4067             }
4068           CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4069         }
4070
4071       *charbuf++ = c;
4072       char_offset++;
4073       continue;
4074
4075     invalid_code:
4076       src = src_base;
4077       consumed_chars = consumed_chars_base;
4078       ONE_MORE_BYTE (c);
4079       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4080       char_offset++;
4081       coding->errors++;
4082     }
4083
4084  no_more_source:
4085   if (last_id != charset_ascii)
4086     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4087   coding->consumed_char += consumed_chars_base;
4088   coding->consumed = src_base - coding->source;
4089   coding->charbuf_used = charbuf - coding->charbuf;
4090 }
4091
4092 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4093    This function can encode charsets `ascii', `katakana-jisx0201',
4094    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4095    are sure that all these charsets are registered as official charset
4096    (i.e. do not have extended leading-codes).  Characters of other
4097    charsets are produced without any encoding.  If SJIS_P is 1, encode
4098    SJIS text, else encode BIG5 text.  */
4099
4100 static int
4101 encode_coding_sjis (coding)
4102      struct coding_system *coding;
4103 {
4104   int multibytep = coding->dst_multibyte;
4105   int *charbuf = coding->charbuf;
4106   int *charbuf_end = charbuf + coding->charbuf_used;
4107   unsigned char *dst = coding->destination + coding->produced;
4108   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4109   int safe_room = 4;
4110   int produced_chars = 0;
4111   Lisp_Object attrs, eol_type, charset_list, val;
4112   int ascii_compatible;
4113   struct charset *charset_roman, *charset_kanji, *charset_kana;
4114   int c;
4115
4116   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4117   val = charset_list;
4118   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4119   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4120   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4121
4122   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4123
4124   while (charbuf < charbuf_end)
4125     {
4126       ASSURE_DESTINATION (safe_room);
4127       c = *charbuf++;
4128       /* Now encode the character C.  */
4129       if (ASCII_CHAR_P (c) && ascii_compatible)
4130         EMIT_ONE_ASCII_BYTE (c);
4131       else if (CHAR_BYTE8_P (c))
4132         {
4133           c = CHAR_TO_BYTE8 (c);
4134           EMIT_ONE_BYTE (c);
4135         }
4136       else
4137         {
4138           unsigned code;
4139           struct charset *charset = char_charset (c, charset_list, &code);
4140
4141           if (!charset)
4142             {
4143               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4144                 {
4145                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4146                   charset = CHARSET_FROM_ID (charset_ascii);
4147                 }
4148               else
4149                 {
4150                   c = coding->default_char;
4151                   charset = char_charset (c, charset_list, &code);
4152                 }
4153             }
4154           if (code == CHARSET_INVALID_CODE (charset))
4155             abort ();
4156           if (charset == charset_kanji)
4157             {
4158               int c1, c2;
4159               JIS_TO_SJIS (code);
4160               c1 = code >> 8, c2 = code & 0xFF;
4161               EMIT_TWO_BYTES (c1, c2);
4162             }
4163           else if (charset == charset_kana)
4164             EMIT_ONE_BYTE (code | 0x80);
4165           else
4166             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4167         }
4168     }
4169   coding->result = CODING_RESULT_SUCCESS;
4170   coding->produced_char += produced_chars;
4171   coding->produced = dst - coding->destination;
4172   return 0;
4173 }
4174
4175 static int
4176 encode_coding_big5 (coding)
4177      struct coding_system *coding;
4178 {
4179   int multibytep = coding->dst_multibyte;
4180   int *charbuf = coding->charbuf;
4181   int *charbuf_end = charbuf + coding->charbuf_used;
4182   unsigned char *dst = coding->destination + coding->produced;
4183   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4184   int safe_room = 4;
4185   int produced_chars = 0;
4186   Lisp_Object attrs, eol_type, charset_list, val;
4187   int ascii_compatible;
4188   struct charset *charset_roman, *charset_big5;
4189   int c;
4190
4191   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4192   val = charset_list;
4193   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4194   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4195   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4196
4197   while (charbuf < charbuf_end)
4198     {
4199       ASSURE_DESTINATION (safe_room);
4200       c = *charbuf++;
4201       /* Now encode the character C.  */
4202       if (ASCII_CHAR_P (c) && ascii_compatible)
4203         EMIT_ONE_ASCII_BYTE (c);
4204       else if (CHAR_BYTE8_P (c))
4205         {
4206           c = CHAR_TO_BYTE8 (c);
4207           EMIT_ONE_BYTE (c);
4208         }
4209       else
4210         {
4211           unsigned code;
4212           struct charset *charset = char_charset (c, charset_list, &code);
4213
4214           if (! charset)
4215             {
4216               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4217                 {
4218                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4219                   charset = CHARSET_FROM_ID (charset_ascii);
4220                 }
4221               else
4222                 {
4223                   c = coding->default_char;
4224                   charset = char_charset (c, charset_list, &code);
4225                 }
4226             }
4227           if (code == CHARSET_INVALID_CODE (charset))
4228             abort ();
4229           if (charset == charset_big5)
4230             {
4231               int c1, c2;
4232
4233               c1 = code >> 8, c2 = code & 0xFF;
4234               EMIT_TWO_BYTES (c1, c2);
4235             }
4236           else
4237             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4238         }
4239     }
4240   coding->result = CODING_RESULT_SUCCESS;
4241   coding->produced_char += produced_chars;
4242   coding->produced = dst - coding->destination;
4243   return 0;
4244 }
4245
4246 \f
4247 /*** 10. CCL handlers ***/
4248
4249 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4250    Check if a text is encoded in a coding system of which
4251    encoder/decoder are written in CCL program.  If it is, return
4252    CATEGORY_MASK_CCL, else return 0.  */
4253
4254 static int
4255 detect_coding_ccl (coding, detect_info)
4256      struct coding_system *coding;
4257      struct coding_detection_info *detect_info;
4258 {
4259   unsigned char *src = coding->source, *src_base = src;
4260   unsigned char *src_end = coding->source + coding->src_bytes;
4261   int multibytep = coding->src_multibyte;
4262   int consumed_chars = 0;
4263   int found = 0;
4264   unsigned char *valids = CODING_CCL_VALIDS (coding);
4265   int head_ascii = coding->head_ascii;
4266   Lisp_Object attrs;
4267
4268   detect_info->checked |= CATEGORY_MASK_CCL;
4269
4270   coding = &coding_categories[coding_category_ccl];
4271   attrs = CODING_ID_ATTRS (coding->id);
4272   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4273     src += head_ascii;
4274
4275   while (1)
4276     {
4277       int c;
4278       ONE_MORE_BYTE (c);
4279       if (! valids[c])
4280         break;
4281       if ((valids[c] > 1))
4282         found = CATEGORY_MASK_CCL;
4283     }
4284   detect_info->rejected |= CATEGORY_MASK_CCL;
4285   return 0;
4286
4287  no_more_source:
4288   detect_info->found |= found;
4289   return 1;
4290 }
4291
4292 static void
4293 decode_coding_ccl (coding)
4294      struct coding_system *coding;
4295 {
4296   const unsigned char *src = coding->source + coding->consumed;
4297   unsigned char *src_end = coding->source + coding->src_bytes;
4298   int *charbuf = coding->charbuf;
4299   int *charbuf_end = charbuf + coding->charbuf_size;
4300   int consumed_chars = 0;
4301   int multibytep = coding->src_multibyte;
4302   struct ccl_program ccl;
4303   int source_charbuf[1024];
4304   int source_byteidx[1024];
4305   Lisp_Object attrs, eol_type, charset_list, valids;
4306
4307   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4308   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4309
4310   while (src < src_end)
4311     {
4312       const unsigned char *p = src;
4313       int *source, *source_end;
4314       int i = 0;
4315
4316       if (multibytep)
4317         while (i < 1024 && p < src_end)
4318           {
4319             source_byteidx[i] = p - src;
4320             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4321           }
4322       else
4323         while (i < 1024 && p < src_end)
4324           source_charbuf[i++] = *p++;
4325
4326       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4327         ccl.last_block = 1;
4328
4329       source = source_charbuf;
4330       source_end = source + i;
4331       while (source < source_end)
4332         {
4333           ccl_driver (&ccl, source, charbuf,
4334                       source_end - source, charbuf_end - charbuf,
4335                       charset_list);
4336           source += ccl.consumed;
4337           charbuf += ccl.produced;
4338           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4339             break;
4340         }
4341       if (source < source_end)
4342         src += source_byteidx[source - source_charbuf];
4343       else
4344         src = p;
4345       consumed_chars += source - source_charbuf;
4346
4347       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4348           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4349         break;
4350     }
4351
4352   switch (ccl.status)
4353     {
4354     case CCL_STAT_SUSPEND_BY_SRC:
4355       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4356       break;
4357     case CCL_STAT_SUSPEND_BY_DST:
4358       break;
4359     case CCL_STAT_QUIT:
4360     case CCL_STAT_INVALID_CMD:
4361       coding->result = CODING_RESULT_INTERRUPT;
4362       break;
4363     default:
4364       coding->result = CODING_RESULT_SUCCESS;
4365       break;
4366     }
4367   coding->consumed_char += consumed_chars;
4368   coding->consumed = src - coding->source;
4369   coding->charbuf_used = charbuf - coding->charbuf;
4370 }
4371
4372 static int
4373 encode_coding_ccl (coding)
4374      struct coding_system *coding;
4375 {
4376   struct ccl_program ccl;
4377   int multibytep = coding->dst_multibyte;
4378   int *charbuf = coding->charbuf;
4379   int *charbuf_end = charbuf + coding->charbuf_used;
4380   unsigned char *dst = coding->destination + coding->produced;
4381   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4382   unsigned char *adjusted_dst_end = dst_end - 1;
4383   int destination_charbuf[1024];
4384   int i, produced_chars = 0;
4385   Lisp_Object attrs, eol_type, charset_list;
4386
4387   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4388   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4389
4390   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4391   ccl.dst_multibyte = coding->dst_multibyte;
4392
4393   while (charbuf < charbuf_end && dst < adjusted_dst_end)
4394     {
4395       int dst_bytes = dst_end - dst;
4396       if (dst_bytes > 1024)
4397         dst_bytes = 1024;
4398
4399       ccl_driver (&ccl, charbuf, destination_charbuf,
4400                   charbuf_end - charbuf, dst_bytes, charset_list);
4401       charbuf += ccl.consumed;
4402       if (multibytep)
4403         for (i = 0; i < ccl.produced; i++)
4404           EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4405       else
4406         {
4407           for (i = 0; i < ccl.produced; i++)
4408             *dst++ = destination_charbuf[i] & 0xFF;
4409           produced_chars += ccl.produced;
4410         }
4411     }
4412
4413   switch (ccl.status)
4414     {
4415     case CCL_STAT_SUSPEND_BY_SRC:
4416       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4417       break;
4418     case CCL_STAT_SUSPEND_BY_DST:
4419       coding->result = CODING_RESULT_INSUFFICIENT_DST;
4420       break;
4421     case CCL_STAT_QUIT:
4422     case CCL_STAT_INVALID_CMD:
4423       coding->result = CODING_RESULT_INTERRUPT;
4424       break;
4425     default:
4426       coding->result = CODING_RESULT_SUCCESS;
4427       break;
4428     }
4429
4430   coding->produced_char += produced_chars;
4431   coding->produced = dst - coding->destination;
4432   return 0;
4433 }
4434
4435
4436 \f
4437 /*** 10, 11. no-conversion handlers ***/
4438
4439 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4440
4441 static void
4442 decode_coding_raw_text (coding)
4443      struct coding_system *coding;
4444 {
4445   coding->chars_at_source = 1;
4446   coding->consumed_char = 0;
4447   coding->consumed = 0;
4448   coding->result = CODING_RESULT_SUCCESS;
4449 }
4450
4451 static int
4452 encode_coding_raw_text (coding)
4453      struct coding_system *coding;
4454 {
4455   int multibytep = coding->dst_multibyte;
4456   int *charbuf = coding->charbuf;
4457   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4458   unsigned char *dst = coding->destination + coding->produced;
4459   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4460   int produced_chars = 0;
4461   int c;
4462
4463   if (multibytep)
4464     {
4465       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4466
4467       if (coding->src_multibyte)
4468         while (charbuf < charbuf_end)
4469           {
4470             ASSURE_DESTINATION (safe_room);
4471             c = *charbuf++;
4472             if (ASCII_CHAR_P (c))
4473               EMIT_ONE_ASCII_BYTE (c);
4474             else if (CHAR_BYTE8_P (c))
4475               {
4476                 c = CHAR_TO_BYTE8 (c);
4477                 EMIT_ONE_BYTE (c);
4478               }
4479             else
4480               {
4481                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4482
4483                 CHAR_STRING_ADVANCE (c, p1);
4484                 while (p0 < p1)
4485                   {
4486                     EMIT_ONE_BYTE (*p0);
4487                     p0++;
4488                   }
4489               }
4490           }
4491       else
4492         while (charbuf < charbuf_end)
4493           {
4494             ASSURE_DESTINATION (safe_room);
4495             c = *charbuf++;
4496             EMIT_ONE_BYTE (c);
4497           }
4498     }
4499   else
4500     {
4501       if (coding->src_multibyte)
4502         {
4503           int safe_room = MAX_MULTIBYTE_LENGTH;
4504
4505           while (charbuf < charbuf_end)
4506             {
4507               ASSURE_DESTINATION (safe_room);
4508               c = *charbuf++;
4509               if (ASCII_CHAR_P (c))
4510                 *dst++ = c;
4511               else if (CHAR_BYTE8_P (c))
4512                 *dst++ = CHAR_TO_BYTE8 (c);
4513               else
4514                 CHAR_STRING_ADVANCE (c, dst);
4515               produced_chars++;
4516             }
4517         }
4518       else
4519         {
4520           ASSURE_DESTINATION (charbuf_end - charbuf);
4521           while (charbuf < charbuf_end && dst < dst_end)
4522             *dst++ = *charbuf++;
4523           produced_chars = dst - (coding->destination + coding->dst_bytes);
4524         }
4525     }
4526   coding->result = CODING_RESULT_SUCCESS;
4527   coding->produced_char += produced_chars;
4528   coding->produced = dst - coding->destination;
4529   return 0;
4530 }
4531
4532 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4533    Check if a text is encoded in a charset-based coding system.  If it
4534    is, return 1, else return 0.  */
4535
4536 static int
4537 detect_coding_charset (coding, detect_info)
4538      struct coding_system *coding;
4539      struct coding_detection_info *detect_info;
4540 {
4541   unsigned char *src = coding->source, *src_base = src;
4542   unsigned char *src_end = coding->source + coding->src_bytes;
4543   int multibytep = coding->src_multibyte;
4544   int consumed_chars = 0;
4545   Lisp_Object attrs, valids;
4546   int found = 0;
4547
4548   detect_info->checked |= CATEGORY_MASK_CHARSET;
4549
4550   coding = &coding_categories[coding_category_charset];
4551   attrs = CODING_ID_ATTRS (coding->id);
4552   valids = AREF (attrs, coding_attr_charset_valids);
4553
4554   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4555     src += coding->head_ascii;
4556
4557   while (1)
4558     {
4559       int c;
4560
4561       ONE_MORE_BYTE (c);
4562       if (NILP (AREF (valids, c)))
4563         break;
4564       if (c >= 0x80)
4565         found = CATEGORY_MASK_CHARSET;
4566     }
4567   detect_info->rejected |= CATEGORY_MASK_CHARSET;
4568   return 0;
4569
4570  no_more_source:
4571   detect_info->found |= found;
4572   return 1;
4573 }
4574
4575 static void
4576 decode_coding_charset (coding)
4577      struct coding_system *coding;
4578 {
4579   unsigned char *src = coding->source + coding->consumed;
4580   unsigned char *src_end = coding->source + coding->src_bytes;
4581   unsigned char *src_base;
4582   int *charbuf = coding->charbuf;
4583   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4584   int consumed_chars = 0, consumed_chars_base;
4585   int multibytep = coding->src_multibyte;
4586   Lisp_Object attrs, eol_type, charset_list, valids;
4587   int char_offset = coding->produced_char;
4588   int last_offset = char_offset;
4589   int last_id = charset_ascii;
4590
4591   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4592   valids = AREF (attrs, coding_attr_charset_valids);
4593
4594   while (1)
4595     {
4596       int c;
4597
4598       src_base = src;
4599       consumed_chars_base = consumed_chars;
4600
4601       if (charbuf >= charbuf_end)
4602         break;
4603
4604       ONE_MORE_BYTE (c);
4605       if (c == '\r')
4606         {
4607           /* Here we assume that no charset maps '\r' to something
4608              else.  */
4609           if (EQ (eol_type, Qdos))
4610             {
4611               if (src == src_end)
4612                 {
4613                   coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4614                   goto no_more_source;
4615                 }
4616               if (*src == '\n')
4617                 ONE_MORE_BYTE (c);
4618             }
4619           else if (EQ (eol_type, Qmac))
4620             c = '\n';
4621         }
4622       else
4623         {
4624           Lisp_Object val;
4625           struct charset *charset;
4626           int dim;
4627           int len = 1;
4628           unsigned code = c;
4629
4630           val = AREF (valids, c);
4631           if (NILP (val))
4632             goto invalid_code;
4633           if (INTEGERP (val))
4634             {
4635               charset = CHARSET_FROM_ID (XFASTINT (val));
4636               dim = CHARSET_DIMENSION (charset);
4637               while (len < dim)
4638                 {
4639                   ONE_MORE_BYTE (c);
4640                   code = (code << 8) | c;
4641                   len++;
4642                 }
4643               CODING_DECODE_CHAR (coding, src, src_base, src_end,
4644                                   charset, code, c);
4645             }
4646           else
4647             {
4648               /* VAL is a list of charset IDs.  It is assured that the
4649                  list is sorted by charset dimensions (smaller one
4650                  comes first).  */
4651               while (CONSP (val))
4652                 {
4653                   charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4654                   dim = CHARSET_DIMENSION (charset);
4655                   while (len < dim)
4656                     {
4657                       ONE_MORE_BYTE (c);
4658                       code = (code << 8) | c;
4659                       len++;
4660                     }
4661                   CODING_DECODE_CHAR (coding, src, src_base,
4662                                       src_end, charset, code, c);
4663                   if (c >= 0)
4664                     break;
4665                   val = XCDR (val);
4666                 }
4667             }
4668           if (c < 0)
4669             goto invalid_code;
4670           if (charset->id != charset_ascii
4671               && last_id != charset->id)
4672             {
4673               if (last_id != charset_ascii)
4674                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4675               last_id = charset->id;
4676               last_offset = char_offset;
4677             }
4678         }
4679       *charbuf++ = c;
4680       char_offset++;
4681       continue;
4682
4683     invalid_code:
4684       src = src_base;
4685       consumed_chars = consumed_chars_base;
4686       ONE_MORE_BYTE (c);
4687       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4688       char_offset++;
4689       coding->errors++;
4690     }
4691
4692  no_more_source:
4693   if (last_id != charset_ascii)
4694     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4695   coding->consumed_char += consumed_chars_base;
4696   coding->consumed = src_base - coding->source;
4697   coding->charbuf_used = charbuf - coding->charbuf;
4698 }
4699
4700 static int
4701 encode_coding_charset (coding)
4702      struct coding_system *coding;
4703 {
4704   int multibytep = coding->dst_multibyte;
4705   int *charbuf = coding->charbuf;
4706   int *charbuf_end = charbuf + coding->charbuf_used;
4707   unsigned char *dst = coding->destination + coding->produced;
4708   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4709   int safe_room = MAX_MULTIBYTE_LENGTH;
4710   int produced_chars = 0;
4711   Lisp_Object attrs, eol_type, charset_list;
4712   int ascii_compatible;
4713   int c;
4714
4715   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4716   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4717
4718   while (charbuf < charbuf_end)
4719     {
4720       struct charset *charset;
4721       unsigned code;
4722
4723       ASSURE_DESTINATION (safe_room);
4724       c = *charbuf++;
4725       if (ascii_compatible && ASCII_CHAR_P (c))
4726         EMIT_ONE_ASCII_BYTE (c);
4727       else if (CHAR_BYTE8_P (c))
4728         {
4729           c = CHAR_TO_BYTE8 (c);
4730           EMIT_ONE_BYTE (c);
4731         }
4732       else
4733         {
4734           charset = char_charset (c, charset_list, &code);
4735           if (charset)
4736             {
4737               if (CHARSET_DIMENSION (charset) == 1)
4738                 EMIT_ONE_BYTE (code);
4739               else if (CHARSET_DIMENSION (charset) == 2)
4740                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4741               else if (CHARSET_DIMENSION (charset) == 3)
4742                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4743               else
4744                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4745                                  (code >> 8) & 0xFF, code & 0xFF);
4746             }
4747           else
4748             {
4749               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4750                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4751               else
4752                 c = coding->default_char;
4753               EMIT_ONE_BYTE (c);
4754             }
4755         }
4756     }
4757
4758   coding->result = CODING_RESULT_SUCCESS;
4759   coding->produced_char += produced_chars;
4760   coding->produced = dst - coding->destination;
4761   return 0;
4762 }
4763
4764 \f
4765 /*** 7. C library functions ***/
4766
4767 /* Setup coding context CODING from information about CODING_SYSTEM.
4768    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
4769    CODING_SYSTEM is invalid, signal an error.  */
4770
4771 void
4772 setup_coding_system (coding_system, coding)
4773      Lisp_Object coding_system;
4774      struct coding_system *coding;
4775 {
4776   Lisp_Object attrs;
4777   Lisp_Object eol_type;
4778   Lisp_Object coding_type;
4779   Lisp_Object val;
4780
4781   if (NILP (coding_system))
4782     coding_system = Qno_conversion;
4783
4784   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4785
4786   attrs = CODING_ID_ATTRS (coding->id);
4787   eol_type = CODING_ID_EOL_TYPE (coding->id);
4788
4789   coding->mode = 0;
4790   coding->head_ascii = -1;
4791   coding->common_flags
4792     = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4793
4794   val = CODING_ATTR_SAFE_CHARSETS (attrs);
4795   coding->max_charset_id = XSTRING (val)->size - 1;
4796   coding->safe_charsets = (char *) XSTRING (val)->data;
4797   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4798
4799   coding_type = CODING_ATTR_TYPE (attrs);
4800   if (EQ (coding_type, Qundecided))
4801     {
4802       coding->detector = NULL;
4803       coding->decoder = decode_coding_raw_text;
4804       coding->encoder = encode_coding_raw_text;
4805       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4806     }
4807   else if (EQ (coding_type, Qiso_2022))
4808     {
4809       int i;
4810       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4811       enum coding_category category =  XINT (CODING_ATTR_CATEGORY (attrs));
4812
4813       /* Invoke graphic register 0 to plane 0.  */
4814       CODING_ISO_INVOCATION (coding, 0) = 0;
4815       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
4816       CODING_ISO_INVOCATION (coding, 1)
4817         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4818       /* Setup the initial status of designation.  */
4819       for (i = 0; i < 4; i++)
4820         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4821       /* Not single shifting initially.  */
4822       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4823       /* Beginning of buffer should also be regarded as bol. */
4824       CODING_ISO_BOL (coding) = 1;
4825       coding->detector = detect_coding_iso_2022;
4826       coding->decoder = decode_coding_iso_2022;
4827       coding->encoder = encode_coding_iso_2022;
4828       if (flags & CODING_ISO_FLAG_SAFE)
4829         coding->mode |= CODING_MODE_SAFE_ENCODING;
4830       coding->common_flags
4831         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4832             | CODING_REQUIRE_FLUSHING_MASK);
4833       if (flags & CODING_ISO_FLAG_COMPOSITION)
4834         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4835       if (flags & CODING_ISO_FLAG_DESIGNATION)
4836         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
4837       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4838         {
4839           setup_iso_safe_charsets (attrs);
4840           val = CODING_ATTR_SAFE_CHARSETS (attrs);
4841           coding->max_charset_id = XSTRING (val)->size - 1;
4842           coding->safe_charsets = (char *) XSTRING (val)->data;
4843         }
4844       CODING_ISO_FLAGS (coding) = flags;
4845     }
4846   else if (EQ (coding_type, Qcharset))
4847     {
4848       coding->detector = detect_coding_charset;
4849       coding->decoder = decode_coding_charset;
4850       coding->encoder = encode_coding_charset;
4851       coding->common_flags
4852         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4853     }
4854   else if (EQ (coding_type, Qutf_8))
4855     {
4856       coding->detector = detect_coding_utf_8;
4857       coding->decoder = decode_coding_utf_8;
4858       coding->encoder = encode_coding_utf_8;
4859       coding->common_flags
4860         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4861     }
4862   else if (EQ (coding_type, Qutf_16))
4863     {
4864       val = AREF (attrs, coding_attr_utf_16_bom);
4865       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4866                                     : EQ (val, Qt) ? utf_16_with_bom
4867                                     : utf_16_without_bom);
4868       val = AREF (attrs, coding_attr_utf_16_endian);
4869       CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4870                                        : utf_16_little_endian);
4871       CODING_UTF_16_SURROGATE (coding) = 0;
4872       coding->detector = detect_coding_utf_16;
4873       coding->decoder = decode_coding_utf_16;
4874       coding->encoder = encode_coding_utf_16;
4875       coding->common_flags
4876         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4877     }
4878   else if (EQ (coding_type, Qccl))
4879     {
4880       coding->detector = detect_coding_ccl;
4881       coding->decoder = decode_coding_ccl;
4882       coding->encoder = encode_coding_ccl;
4883       coding->common_flags
4884         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4885             | CODING_REQUIRE_FLUSHING_MASK);
4886     }
4887   else if (EQ (coding_type, Qemacs_mule))
4888     {
4889       coding->detector = detect_coding_emacs_mule;
4890       coding->decoder = decode_coding_emacs_mule;
4891       coding->encoder = encode_coding_emacs_mule;
4892       coding->common_flags
4893         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4894       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4895           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4896         {
4897           Lisp_Object tail, safe_charsets;
4898           int max_charset_id = 0;
4899
4900           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4901                tail = XCDR (tail))
4902             if (max_charset_id < XFASTINT (XCAR (tail)))
4903               max_charset_id = XFASTINT (XCAR (tail));
4904           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4905                                         make_number (255));
4906           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4907                tail = XCDR (tail))
4908             XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4909           coding->max_charset_id = max_charset_id;
4910           coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4911         }
4912     }
4913   else if (EQ (coding_type, Qshift_jis))
4914     {
4915       coding->detector = detect_coding_sjis;
4916       coding->decoder = decode_coding_sjis;
4917       coding->encoder = encode_coding_sjis;
4918       coding->common_flags
4919         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4920     }
4921   else if (EQ (coding_type, Qbig5))
4922     {
4923       coding->detector = detect_coding_big5;
4924       coding->decoder = decode_coding_big5;
4925       coding->encoder = encode_coding_big5;
4926       coding->common_flags
4927         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4928     }
4929   else                          /* EQ (coding_type, Qraw_text) */
4930     {
4931       coding->detector = NULL;
4932       coding->decoder = decode_coding_raw_text;
4933       coding->encoder = encode_coding_raw_text;
4934       coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4935     }
4936
4937   return;
4938 }
4939
4940 /* Return raw-text or one of its subsidiaries that has the same
4941    eol_type as CODING-SYSTEM.  */
4942
4943 Lisp_Object
4944 raw_text_coding_system (coding_system)
4945      Lisp_Object coding_system;
4946 {
4947   Lisp_Object spec, attrs;
4948   Lisp_Object eol_type, raw_text_eol_type;
4949
4950   spec = CODING_SYSTEM_SPEC (coding_system);
4951   attrs = AREF (spec, 0);
4952
4953   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4954     return coding_system;
4955
4956   eol_type = AREF (spec, 2);
4957   if (VECTORP (eol_type))
4958     return Qraw_text;
4959   spec = CODING_SYSTEM_SPEC (Qraw_text);
4960   raw_text_eol_type = AREF (spec, 2);
4961   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4962           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4963           : AREF (raw_text_eol_type, 2));
4964 }
4965
4966
4967 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4968    does, return one of the subsidiary that has the same eol-spec as
4969    PARENT.  Otherwise, return CODING_SYSTEM.  */
4970
4971 Lisp_Object
4972 coding_inherit_eol_type (coding_system, parent)
4973      Lisp_Object coding_system, parent;
4974 {
4975   Lisp_Object spec, attrs, eol_type;
4976
4977   spec = CODING_SYSTEM_SPEC (coding_system);
4978   attrs = AREF (spec, 0);
4979   eol_type = AREF (spec, 2);
4980   if (VECTORP (eol_type))
4981     {
4982       Lisp_Object parent_spec;
4983       Lisp_Object parent_eol_type;
4984
4985       parent_spec
4986         = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4987       parent_eol_type = AREF (parent_spec, 2);
4988       if (EQ (parent_eol_type, Qunix))
4989         coding_system = AREF (eol_type, 0);
4990       else if (EQ (parent_eol_type, Qdos))
4991         coding_system = AREF (eol_type, 1);
4992       else if (EQ (parent_eol_type, Qmac))
4993         coding_system = AREF (eol_type, 2);
4994     }
4995   return coding_system;
4996 }
4997
4998 /* Emacs has a mechanism to automatically detect a coding system if it
4999    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5000    it's impossible to distinguish some coding systems accurately
5001    because they use the same range of codes.  So, at first, coding
5002    systems are categorized into 7, those are:
5003
5004    o coding-category-emacs-mule
5005
5006         The category for a coding system which has the same code range
5007         as Emacs' internal format.  Assigned the coding-system (Lisp
5008         symbol) `emacs-mule' by default.
5009
5010    o coding-category-sjis
5011
5012         The category for a coding system which has the same code range
5013         as SJIS.  Assigned the coding-system (Lisp
5014         symbol) `japanese-shift-jis' by default.
5015
5016    o coding-category-iso-7
5017
5018         The category for a coding system which has the same code range
5019         as ISO2022 of 7-bit environment.  This doesn't use any locking
5020         shift and single shift functions.  This can encode/decode all
5021         charsets.  Assigned the coding-system (Lisp symbol)
5022         `iso-2022-7bit' by default.
5023
5024    o coding-category-iso-7-tight
5025
5026         Same as coding-category-iso-7 except that this can
5027         encode/decode only the specified charsets.
5028
5029    o coding-category-iso-8-1
5030
5031         The category for a coding system which has the same code range
5032         as ISO2022 of 8-bit environment and graphic plane 1 used only
5033         for DIMENSION1 charset.  This doesn't use any locking shift
5034         and single shift functions.  Assigned the coding-system (Lisp
5035         symbol) `iso-latin-1' by default.
5036
5037    o coding-category-iso-8-2
5038
5039         The category for a coding system which has the same code range
5040         as ISO2022 of 8-bit environment and graphic plane 1 used only
5041         for DIMENSION2 charset.  This doesn't use any locking shift
5042         and single shift functions.  Assigned the coding-system (Lisp
5043         symbol) `japanese-iso-8bit' by default.
5044
5045    o coding-category-iso-7-else
5046
5047         The category for a coding system which has the same code range
5048         as ISO2022 of 7-bit environemnt but uses locking shift or
5049         single shift functions.  Assigned the coding-system (Lisp
5050         symbol) `iso-2022-7bit-lock' by default.
5051
5052    o coding-category-iso-8-else
5053
5054         The category for a coding system which has the same code range
5055         as ISO2022 of 8-bit environemnt but uses locking shift or
5056         single shift functions.  Assigned the coding-system (Lisp
5057         symbol) `iso-2022-8bit-ss2' by default.
5058
5059    o coding-category-big5
5060
5061         The category for a coding system which has the same code range
5062         as BIG5.  Assigned the coding-system (Lisp symbol)
5063         `cn-big5' by default.
5064
5065    o coding-category-utf-8
5066
5067         The category for a coding system which has the same code range
5068         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
5069         symbol) `utf-8' by default.
5070
5071    o coding-category-utf-16-be
5072
5073         The category for a coding system in which a text has an
5074         Unicode signature (cf. Unicode Standard) in the order of BIG
5075         endian at the head.  Assigned the coding-system (Lisp symbol)
5076         `utf-16-be' by default.
5077
5078    o coding-category-utf-16-le
5079
5080         The category for a coding system in which a text has an
5081         Unicode signature (cf. Unicode Standard) in the order of
5082         LITTLE endian at the head.  Assigned the coding-system (Lisp
5083         symbol) `utf-16-le' by default.
5084
5085    o coding-category-ccl
5086
5087         The category for a coding system of which encoder/decoder is
5088         written in CCL programs.  The default value is nil, i.e., no
5089         coding system is assigned.
5090
5091    o coding-category-binary
5092
5093         The category for a coding system not categorized in any of the
5094         above.  Assigned the coding-system (Lisp symbol)
5095         `no-conversion' by default.
5096
5097    Each of them is a Lisp symbol and the value is an actual
5098    `coding-system's (this is also a Lisp symbol) assigned by a user.
5099    What Emacs does actually is to detect a category of coding system.
5100    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5101    decide only one possible category, it selects a category of the
5102    highest priority.  Priorities of categories are also specified by a
5103    user in a Lisp variable `coding-category-list'.
5104
5105 */
5106
5107 #define EOL_SEEN_NONE   0
5108 #define EOL_SEEN_LF     1
5109 #define EOL_SEEN_CR     2
5110 #define EOL_SEEN_CRLF   4
5111
5112 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5113    SOURCE is encoded.  If CATEGORY is one of
5114    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5115    two-byte, else they are encoded by one-byte.
5116
5117    Return one of EOL_SEEN_XXX.  */
5118
5119 #define MAX_EOL_CHECK_COUNT 3
5120
5121 static int
5122 detect_eol (source, src_bytes, category)
5123      unsigned char *source;
5124      EMACS_INT src_bytes;
5125      enum coding_category category;
5126 {
5127   unsigned char *src = source, *src_end = src + src_bytes;
5128   unsigned char c;
5129   int total  = 0;
5130   int eol_seen = EOL_SEEN_NONE;
5131
5132   if ((1 << category) & CATEGORY_MASK_UTF_16)
5133     {
5134       int msb, lsb;
5135
5136       msb = category == (coding_category_utf_16_le
5137                          | coding_category_utf_16_le_nosig);
5138       lsb = 1 - msb;
5139
5140       while (src + 1 < src_end)
5141         {
5142           c = src[lsb];
5143           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5144             {
5145               int this_eol;
5146
5147               if (c == '\n')
5148                 this_eol = EOL_SEEN_LF;
5149               else if (src + 3 >= src_end
5150                        || src[msb + 2] != 0
5151                        || src[lsb + 2] != '\n')
5152                 this_eol = EOL_SEEN_CR;
5153               else
5154                 this_eol = EOL_SEEN_CRLF;
5155
5156               if (eol_seen == EOL_SEEN_NONE)
5157                 /* This is the first end-of-line.  */
5158                 eol_seen = this_eol;
5159               else if (eol_seen != this_eol)
5160                 {
5161                   /* The found type is different from what found before.  */
5162                   eol_seen = EOL_SEEN_LF;
5163                   break;
5164                 }
5165               if (++total == MAX_EOL_CHECK_COUNT)
5166                 break;
5167             }
5168           src += 2;
5169         }
5170     }
5171   else
5172     {
5173       while (src < src_end)
5174         {
5175           c = *src++;
5176           if (c == '\n' || c == '\r')
5177             {
5178               int this_eol;
5179
5180               if (c == '\n')
5181                 this_eol = EOL_SEEN_LF;
5182               else if (src >= src_end || *src != '\n')
5183                 this_eol = EOL_SEEN_CR;
5184               else
5185                 this_eol = EOL_SEEN_CRLF, src++;
5186
5187               if (eol_seen == EOL_SEEN_NONE)
5188                 /* This is the first end-of-line.  */
5189                 eol_seen = this_eol;
5190               else if (eol_seen != this_eol)
5191                 {
5192                   /* The found type is different from what found before.  */
5193                   eol_seen = EOL_SEEN_LF;
5194                   break;
5195                 }
5196               if (++total == MAX_EOL_CHECK_COUNT)
5197                 break;
5198             }
5199         }
5200     }
5201   return eol_seen;
5202 }
5203
5204
5205 static void
5206 adjust_coding_eol_type (coding, eol_seen)
5207      struct coding_system *coding;
5208      int eol_seen;
5209 {
5210   Lisp_Object eol_type;
5211
5212   eol_type = CODING_ID_EOL_TYPE (coding->id);
5213   if (eol_seen & EOL_SEEN_LF)
5214     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5215   else if (eol_seen & EOL_SEEN_CRLF)
5216     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5217   else if (eol_seen & EOL_SEEN_CR)
5218     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5219 }
5220
5221 /* Detect how a text specified in CODING is encoded.  If a coding
5222    system is detected, update fields of CODING by the detected coding
5223    system.  */
5224
5225 void
5226 detect_coding (coding)
5227      struct coding_system *coding;
5228 {
5229   unsigned char *src, *src_end;
5230   Lisp_Object attrs, coding_type;
5231
5232   coding->consumed = coding->consumed_char = 0;
5233   coding->produced = coding->produced_char = 0;
5234   coding_set_source (coding);
5235
5236   src_end = coding->source + coding->src_bytes;
5237
5238   /* If we have not yet decided the text encoding type, detect it
5239      now.  */
5240   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5241     {
5242       int c, i;
5243
5244       for (src = coding->source; src < src_end; src++)
5245         {
5246           c = *src;
5247           if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5248                                         || c == ISO_CODE_SI
5249                                         || c == ISO_CODE_SO)))
5250             break;
5251         }
5252       coding->head_ascii = src - (coding->source + coding->consumed);
5253
5254       if (coding->head_ascii < coding->src_bytes)
5255         {
5256           struct coding_detection_info detect_info;
5257           enum coding_category category;
5258           struct coding_system *this;
5259
5260           detect_info.checked = detect_info.found = detect_info.rejected = 0;
5261           for (i = 0; i < coding_category_raw_text; i++)
5262             {
5263               category = coding_priorities[i];
5264               this = coding_categories + category;
5265               if (this->id < 0)
5266                 {
5267                   /* No coding system of this category is defined.  */
5268                   detect_info.rejected |= (1 << category);
5269                 }
5270               else if (category >= coding_category_raw_text)
5271                 continue;
5272               else if (detect_info.checked & (1 << category))
5273                 {
5274                   if (detect_info.found & (1 << category))
5275                     break;
5276                 }
5277               else if ((*(this->detector)) (coding, &detect_info)
5278                        && detect_info.found & (1 << category))
5279                 break;
5280             }
5281           if (i < coding_category_raw_text)
5282             setup_coding_system (CODING_ID_NAME (this->id), coding);
5283           else if (detect_info.rejected == CATEGORY_MASK_ANY)
5284             setup_coding_system (Qraw_text, coding);
5285           else if (detect_info.rejected)
5286             for (i = 0; i < coding_category_raw_text; i++)
5287               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5288                 {
5289                   this = coding_categories + coding_priorities[i];
5290                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5291                   break;
5292                 }
5293         }
5294     }
5295
5296   attrs = CODING_ID_ATTRS (coding->id);
5297   coding_type = CODING_ATTR_TYPE (attrs);
5298
5299   /* If we have not yet decided the EOL type, detect it now.  But, the
5300      detection is impossible for a CCL based coding system, in which
5301      case, we detct the EOL type after decoding.  */
5302   if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5303       && ! EQ (coding_type, Qccl))
5304     {
5305       int eol_seen = detect_eol (coding->source, coding->src_bytes,
5306                                  XINT (CODING_ATTR_CATEGORY (attrs)));
5307
5308       if (eol_seen != EOL_SEEN_NONE)
5309         adjust_coding_eol_type (coding, eol_seen);
5310     }
5311 }
5312
5313
5314 static void
5315 decode_eol (coding)
5316      struct coding_system *coding;
5317 {
5318   if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
5319     {
5320       unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5321       unsigned char *pend = p + coding->produced;
5322       int eol_seen = EOL_SEEN_NONE;
5323
5324       for (; p < pend; p++)
5325         {
5326           if (*p == '\n')
5327             eol_seen |= EOL_SEEN_LF;
5328           else if (*p == '\r')
5329             {
5330               if (p + 1 < pend && *(p + 1) == '\n')
5331                 {
5332                   eol_seen |= EOL_SEEN_CRLF;
5333                   p++;
5334                 }
5335               else
5336                 eol_seen |= EOL_SEEN_CR;
5337             }
5338         }
5339       if (eol_seen != EOL_SEEN_NONE)
5340         adjust_coding_eol_type (coding, eol_seen);
5341     }
5342
5343   if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5344     {
5345       unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5346       unsigned char *pend = p + coding->produced;
5347
5348       for (; p < pend; p++)
5349         if (*p == '\r')
5350           *p = '\n';
5351     }
5352   else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5353     {
5354       unsigned char *p, *pbeg, *pend;
5355       Lisp_Object undo_list;
5356
5357       move_gap_both (coding->dst_pos + coding->produced_char,
5358                      coding->dst_pos_byte + coding->produced);
5359       undo_list = current_buffer->undo_list;
5360       current_buffer->undo_list = Qt;
5361       del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
5362       current_buffer->undo_list = undo_list;
5363       pbeg = GPT_ADDR;
5364       pend = pbeg + coding->produced;
5365
5366       for (p = pend - 1; p >= pbeg; p--)
5367         if (*p == '\r')
5368           {
5369             safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5370             pend--;
5371           }
5372       coding->produced_char -= coding->produced - (pend - pbeg);
5373       coding->produced = pend - pbeg;
5374       insert_from_gap (coding->produced_char, coding->produced);
5375     }
5376 }
5377
5378 static void
5379 translate_chars (coding, table)
5380      struct coding_system *coding;
5381      Lisp_Object table;
5382 {
5383   int *charbuf = coding->charbuf;
5384   int *charbuf_end = charbuf + coding->charbuf_used;
5385   int c;
5386
5387   if (coding->chars_at_source)
5388     return;
5389
5390   while (charbuf < charbuf_end)
5391     {
5392       c = *charbuf;
5393       if (c < 0)
5394         charbuf += c;
5395       else
5396         *charbuf++ = translate_char (table, c);
5397     }
5398 }
5399
5400 static int
5401 produce_chars (coding)
5402      struct coding_system *coding;
5403 {
5404   unsigned char *dst = coding->destination + coding->produced;
5405   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5406   int produced;
5407   int produced_chars = 0;
5408
5409   if (! coding->chars_at_source)
5410     {
5411       /* Characters are in coding->charbuf.  */
5412       int *buf = coding->charbuf;
5413       int *buf_end = buf + coding->charbuf_used;
5414       unsigned char *adjusted_dst_end;
5415
5416       if (BUFFERP (coding->src_object)
5417           && EQ (coding->src_object, coding->dst_object))
5418         dst_end = coding->source + coding->consumed;
5419       adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5420
5421       while (buf < buf_end)
5422         {
5423           int c = *buf++;
5424
5425           if (dst >= adjusted_dst_end)
5426             {
5427               dst = alloc_destination (coding,
5428                                        buf_end - buf + MAX_MULTIBYTE_LENGTH,
5429                                        dst);
5430               dst_end = coding->destination + coding->dst_bytes;
5431               adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5432             }
5433           if (c >= 0)
5434             {
5435               if (coding->dst_multibyte
5436                   || ! CHAR_BYTE8_P (c))
5437                 CHAR_STRING_ADVANCE (c, dst);
5438               else
5439                 *dst++ = CHAR_TO_BYTE8 (c);
5440               produced_chars++;
5441             }
5442           else
5443             /* This is an annotation datum.  */
5444             buf -= c + 1;
5445         }
5446     }
5447   else
5448     {
5449       unsigned char *src = coding->source;
5450       unsigned char *src_end = src + coding->src_bytes;
5451       Lisp_Object eol_type;
5452
5453       eol_type = CODING_ID_EOL_TYPE (coding->id);
5454
5455       if (coding->src_multibyte != coding->dst_multibyte)
5456         {
5457           if (coding->src_multibyte)
5458             {
5459               int multibytep = 1;
5460               int consumed_chars;
5461
5462               while (1)
5463                 {
5464                   unsigned char *src_base = src;
5465                   int c;
5466
5467                   ONE_MORE_BYTE (c);
5468                   if (c == '\r')
5469                     {
5470                       if (EQ (eol_type, Qdos))
5471                         {
5472                           if (src == src_end)
5473                             {
5474                               coding->result = CODING_RESULT_INSUFFICIENT_SRC;
5475                               goto no_more_source;
5476                             }
5477                           if (*src == '\n')
5478                             c = *src++;
5479                         }
5480                       else if (EQ (eol_type, Qmac))
5481                         c = '\n';
5482                     }
5483                   if (dst == dst_end)
5484                     {
5485                       coding->consumed = src - coding->source;
5486
5487                     if (EQ (coding->src_object, coding->dst_object))
5488                       dst_end = src;
5489                     if (dst == dst_end)
5490                       {
5491                         dst = alloc_destination (coding, src_end - src + 1,
5492                                                  dst);
5493                         dst_end = coding->destination + coding->dst_bytes;
5494                         coding_set_source (coding);
5495                         src = coding->source + coding->consumed;
5496                         src_end = coding->source + coding->src_bytes;
5497                       }
5498                     }
5499                   *dst++ = c;
5500                   produced_chars++;
5501                 }
5502             no_more_source:
5503               ;
5504             }
5505           else
5506             while (src < src_end)
5507               {
5508                 int multibytep = 1;
5509                 int c = *src++;
5510
5511                 if (c == '\r')
5512                   {
5513                     if (EQ (eol_type, Qdos))
5514                       {
5515                         if (src < src_end
5516                             && *src == '\n')
5517                           c = *src++;
5518                       }
5519                     else if (EQ (eol_type, Qmac))
5520                       c = '\n';
5521                   }
5522                 if (dst >= dst_end - 1)
5523                   {
5524                     coding->consumed = src - coding->source;
5525
5526                     if (EQ (coding->src_object, coding->dst_object))
5527                       dst_end = src;
5528                     if (dst >= dst_end - 1)
5529                       {
5530                         dst = alloc_destination (coding, src_end - src + 2,
5531                                                  dst);
5532                         dst_end = coding->destination + coding->dst_bytes;
5533                         coding_set_source (coding);
5534                         src = coding->source + coding->consumed;
5535                         src_end = coding->source + coding->src_bytes;
5536                       }
5537                   }
5538                 EMIT_ONE_BYTE (c);
5539               }
5540         }
5541       else
5542         {
5543           if (!EQ (coding->src_object, coding->dst_object))
5544             {
5545               int require = coding->src_bytes - coding->dst_bytes;
5546
5547               if (require > 0)
5548                 {
5549                   EMACS_INT offset = src - coding->source;
5550
5551                   dst = alloc_destination (coding, require, dst);
5552                   coding_set_source (coding);
5553                   src = coding->source + offset;
5554                   src_end = coding->source + coding->src_bytes;
5555                 }
5556             }
5557           produced_chars = coding->src_chars;
5558           while (src < src_end)
5559             {
5560               int c = *src++;
5561
5562               if (c == '\r')
5563                 {
5564                   if (EQ (eol_type, Qdos))
5565                     {
5566                       if (src < src_end
5567                           && *src == '\n')
5568                         c = *src++;
5569                       produced_chars--;
5570                     }
5571                   else if (EQ (eol_type, Qmac))
5572                     c = '\n';
5573                 }
5574               *dst++ = c;
5575             }
5576         }
5577       coding->consumed = coding->src_bytes;
5578       coding->consumed_char = coding->src_chars;
5579     }
5580
5581   produced = dst - (coding->destination + coding->produced);
5582   if (BUFFERP (coding->dst_object))
5583     insert_from_gap (produced_chars, produced);
5584   coding->produced += produced;
5585   coding->produced_char += produced_chars;
5586   return produced_chars;
5587 }
5588
5589 /* Compose text in CODING->object according to the annotation data at
5590    CHARBUF.  CHARBUF is an array:
5591      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5592  */
5593
5594 static INLINE void
5595 produce_composition (coding, charbuf)
5596      struct coding_system *coding;
5597      int *charbuf;
5598 {
5599   int len;
5600   EMACS_INT from, to;
5601   enum composition_method method;
5602   Lisp_Object components;
5603
5604   len = -charbuf[0];
5605   from = coding->dst_pos + charbuf[2];
5606   to = coding->dst_pos + charbuf[3];
5607   method = (enum composition_method) (charbuf[4]);
5608
5609   if (method == COMPOSITION_RELATIVE)
5610     components = Qnil;
5611   else
5612     {
5613       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5614       int i;
5615
5616       len -= 5;
5617       charbuf += 5;
5618       for (i = 0; i < len; i++)
5619         args[i] = make_number (charbuf[i]);
5620       components = (method == COMPOSITION_WITH_ALTCHARS
5621                     ? Fstring (len, args) : Fvector (len, args));
5622     }
5623   compose_text (from, to, components, Qnil, coding->dst_object);
5624 }
5625
5626
5627 /* Put `charset' property on text in CODING->object according to
5628    the annotation data at CHARBUF.  CHARBUF is an array:
5629      [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5630  */
5631
5632 static INLINE void
5633 produce_charset (coding, charbuf)
5634      struct coding_system *coding;
5635      int *charbuf;
5636 {
5637   EMACS_INT from = coding->dst_pos + charbuf[2];
5638   EMACS_INT to = coding->dst_pos + charbuf[3];
5639   struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
5640
5641   Fput_text_property (make_number (from), make_number (to),
5642                       Qcharset, CHARSET_NAME (charset),
5643                       coding->dst_object);
5644 }
5645
5646
5647 #define CHARBUF_SIZE 0x4000
5648
5649 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
5650   do {                                                                  \
5651     int size = CHARBUF_SIZE;;                                           \
5652                                                                         \
5653     coding->charbuf = NULL;                                             \
5654     while (size > 1024)                                                 \
5655       {                                                                 \
5656         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
5657         if (coding->charbuf)                                            \
5658           break;                                                        \
5659         size >>= 1;                                                     \
5660       }                                                                 \
5661     if (! coding->charbuf)                                              \
5662       {                                                                 \
5663         coding->result = CODING_RESULT_INSUFFICIENT_MEM;                \
5664         return coding->result;                                          \
5665       }                                                                 \
5666     coding->charbuf_size = size;                                        \
5667   } while (0)
5668
5669
5670 static void
5671 produce_annotation (coding)
5672      struct coding_system *coding;
5673 {
5674   int *charbuf = coding->charbuf;
5675   int *charbuf_end = charbuf + coding->charbuf_used;
5676
5677   if (NILP (coding->dst_object))
5678     return;
5679
5680   while (charbuf < charbuf_end)
5681     {
5682       if (*charbuf >= 0)
5683         charbuf++;
5684       else
5685         {
5686           int len = -*charbuf;
5687           switch (charbuf[1])
5688             {
5689             case CODING_ANNOTATE_COMPOSITION_MASK:
5690               produce_composition (coding, charbuf);
5691               break;
5692             case CODING_ANNOTATE_CHARSET_MASK:
5693               produce_charset (coding, charbuf);
5694               break;
5695             default:
5696               abort ();
5697             }
5698           charbuf += len;
5699         }
5700     }
5701 }
5702
5703 /* Decode the data at CODING->src_object into CODING->dst_object.
5704    CODING->src_object is a buffer, a string, or nil.
5705    CODING->dst_object is a buffer.
5706
5707    If CODING->src_object is a buffer, it must be the current buffer.
5708    In this case, if CODING->src_pos is positive, it is a position of
5709    the source text in the buffer, otherwise, the source text is in the
5710    gap area of the buffer, and CODING->src_pos specifies the offset of
5711    the text from GPT (which must be the same as PT).  If this is the
5712    same buffer as CODING->dst_object, CODING->src_pos must be
5713    negative.
5714
5715    If CODING->src_object is a string, CODING->src_pos in an index to
5716    that string.
5717
5718    If CODING->src_object is nil, CODING->source must already point to
5719    the non-relocatable memory area.  In this case, CODING->src_pos is
5720    an offset from CODING->source.
5721
5722    The decoded data is inserted at the current point of the buffer
5723    CODING->dst_object.
5724 */
5725
5726 static int
5727 decode_coding (coding)
5728      struct coding_system *coding;
5729 {
5730   Lisp_Object attrs;
5731
5732   if (BUFFERP (coding->src_object)
5733       && coding->src_pos > 0
5734       && coding->src_pos < GPT
5735       && coding->src_pos + coding->src_chars > GPT)
5736     move_gap_both (coding->src_pos, coding->src_pos_byte);
5737
5738   if (BUFFERP (coding->dst_object))
5739     {
5740       if (current_buffer != XBUFFER (coding->dst_object))
5741         set_buffer_internal (XBUFFER (coding->dst_object));
5742       if (GPT != PT)
5743         move_gap_both (PT, PT_BYTE);
5744     }
5745
5746   coding->consumed = coding->consumed_char = 0;
5747   coding->produced = coding->produced_char = 0;
5748   coding->chars_at_source = 0;
5749   coding->result = CODING_RESULT_SUCCESS;
5750   coding->errors = 0;
5751
5752   ALLOC_CONVERSION_WORK_AREA (coding);
5753
5754   attrs = CODING_ID_ATTRS (coding->id);
5755
5756   do
5757     {
5758       coding_set_source (coding);
5759       coding->annotated = 0;
5760       (*(coding->decoder)) (coding);
5761       if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5762         translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs));
5763       else if (!NILP (Vstandard_translation_table_for_decode))
5764         translate_chars (coding, Vstandard_translation_table_for_decode);
5765       coding_set_destination (coding);
5766       produce_chars (coding);
5767       if (coding->annotated)
5768         produce_annotation (coding);
5769     }
5770   while (coding->consumed < coding->src_bytes
5771          && ! coding->result);
5772
5773   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5774       && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5775       && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5776     decode_eol (coding);
5777
5778   coding->carryover_bytes = 0;
5779   if (coding->consumed < coding->src_bytes)
5780     {
5781       int nbytes = coding->src_bytes - coding->consumed;
5782       unsigned char *src;
5783
5784       coding_set_source (coding);
5785       coding_set_destination (coding);
5786       src = coding->source + coding->consumed;
5787
5788       if (coding->mode & CODING_MODE_LAST_BLOCK)
5789         {
5790           /* Flush out unprocessed data as binary chars.  We are sure
5791              that the number of data is less than the size of
5792              coding->charbuf.  */
5793           while (nbytes-- > 0)
5794             {
5795               int c = *src++;
5796
5797               coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
5798             }
5799           produce_chars (coding);
5800         }
5801       else
5802         {
5803           /* Record unprocessed bytes in coding->carryover.  We are
5804              sure that the number of data is less than the size of
5805              coding->carryover.  */
5806           unsigned char *p = coding->carryover;
5807
5808           coding->carryover_bytes = nbytes;
5809           while (nbytes-- > 0)
5810             *p++ = *src++;
5811         }
5812       coding->consumed = coding->src_bytes;
5813     }
5814
5815   return coding->result;
5816 }
5817
5818
5819 /* Extract an annotation datum from a composition starting at POS and
5820    ending before LIMIT of CODING->src_object (buffer or string), store
5821    the data in BUF, set *STOP to a starting position of the next
5822    composition (if any) or to LIMIT, and return the address of the
5823    next element of BUF.
5824
5825    If such an annotation is not found, set *STOP to a starting
5826    position of a composition after POS (if any) or to LIMIT, and
5827    return BUF.  */
5828
5829 static INLINE int *
5830 handle_composition_annotation (pos, limit, coding, buf, stop)
5831      EMACS_INT pos, limit;
5832      struct coding_system *coding;
5833      int *buf;
5834      EMACS_INT *stop;
5835 {
5836   EMACS_INT start, end;
5837   Lisp_Object prop;
5838
5839   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
5840       || end > limit)
5841     *stop = limit;
5842   else if (start > pos)
5843     *stop = start;
5844   else
5845     {
5846       if (start == pos)
5847         {
5848           /* We found a composition.  Store the corresponding
5849              annotation data in BUF.  */
5850           int *head = buf;
5851           enum composition_method method = COMPOSITION_METHOD (prop);
5852           int nchars = COMPOSITION_LENGTH (prop);
5853
5854           ADD_COMPOSITION_DATA (buf, 0, nchars, method);
5855           if (method != COMPOSITION_RELATIVE)
5856             {
5857               Lisp_Object components;
5858               int len, i, i_byte;
5859
5860               components = COMPOSITION_COMPONENTS (prop);
5861               if (VECTORP (components))
5862                 {
5863                   len = XVECTOR (components)->size;
5864                   for (i = 0; i < len; i++)
5865                     *buf++ = XINT (AREF (components, i));
5866                 }
5867               else if (STRINGP (components))
5868                 {
5869                   len = XSTRING (components)->size;
5870                   i = i_byte = 0;
5871                   while (i < len)
5872                     {
5873                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
5874                       buf++;
5875                     }
5876                 }
5877               else if (INTEGERP (components))
5878                 {
5879                   len = 1;
5880                   *buf++ = XINT (components);
5881                 }
5882               else if (CONSP (components))
5883                 {
5884                   for (len = 0; CONSP (components);
5885                        len++, components = XCDR (components))
5886                     *buf++ = XINT (XCAR (components));
5887                 }
5888               else
5889                 abort ();
5890               *head -= len;
5891             }
5892         }
5893
5894       if (find_composition (end, limit, &start, &end, &prop,
5895                             coding->src_object)
5896           && end <= limit)
5897         *stop = start;
5898       else
5899         *stop = limit;
5900     }
5901   return buf;
5902 }
5903
5904
5905 /* Extract an annotation datum from a text property `charset' at POS of
5906    CODING->src_object (buffer of string), store the data in BUF, set
5907    *STOP to the position where the value of `charset' property changes
5908    (limiting by LIMIT), and return the address of the next element of
5909    BUF.
5910
5911    If the property value is nil, set *STOP to the position where the
5912    property value is non-nil (limiting by LIMIT), and return BUF.  */
5913
5914 static INLINE int *
5915 handle_charset_annotation (pos, limit, coding, buf, stop)
5916      EMACS_INT pos, limit;
5917      struct coding_system *coding;
5918      int *buf;
5919      EMACS_INT *stop;
5920 {
5921   Lisp_Object val, next;
5922   int id;
5923
5924   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
5925   if (! NILP (val) && CHARSETP (val))
5926     id = XINT (CHARSET_SYMBOL_ID (val));
5927   else
5928     id = -1;
5929   ADD_CHARSET_DATA (buf, 0, 0, id);
5930   next = Fnext_single_property_change (make_number (pos), Qcharset,
5931                                        coding->src_object,
5932                                        make_number (limit));
5933   *stop = XINT (next);
5934   return buf;
5935 }
5936
5937
5938 static void
5939 consume_chars (coding)
5940      struct coding_system *coding;
5941 {
5942   int *buf = coding->charbuf;
5943   int *buf_end = coding->charbuf + coding->charbuf_size;
5944   const unsigned char *src = coding->source + coding->consumed;
5945   EMACS_INT pos = coding->src_pos + coding->consumed_char;
5946   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
5947   int multibytep = coding->src_multibyte;
5948   Lisp_Object eol_type;
5949   int c;
5950   EMACS_INT stop, stop_composition, stop_charset;
5951   int id;
5952
5953   eol_type = CODING_ID_EOL_TYPE (coding->id);
5954   if (VECTORP (eol_type))
5955     eol_type = Qunix;
5956
5957   /* Note: composition handling is not yet implemented.  */
5958   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
5959
5960   if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
5961     stop = stop_composition = pos;
5962   else
5963     stop = stop_composition = end_pos;
5964   if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
5965     stop = stop_charset = pos;
5966   else
5967     stop_charset = end_pos;
5968
5969   /* Compensate for CRLF and annotation.  */
5970   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
5971   while (buf < buf_end)
5972     {
5973       if (pos == stop)
5974         {
5975           int *p;
5976
5977           if (pos == end_pos)
5978             break;
5979           if (pos == stop_composition)
5980             buf = handle_composition_annotation (pos, end_pos, coding,
5981                                                  buf, &stop_composition);
5982           if (pos == stop_charset)
5983             buf = handle_charset_annotation (pos, end_pos, coding,
5984                                              buf, &stop_charset);
5985           stop = (stop_composition < stop_charset
5986                   ? stop_composition : stop_charset);
5987         }
5988
5989       if (! multibytep)
5990         c = *src++;
5991       else
5992         c = STRING_CHAR_ADVANCE (src);
5993       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5994         c = '\n';
5995       if (! EQ (eol_type, Qunix))
5996         {
5997           if (c == '\n')
5998             {
5999               if (EQ (eol_type, Qdos))
6000                 *buf++ = '\r';
6001               else
6002                 c = '\r';
6003             }
6004         }
6005       *buf++ = c;
6006       pos++;
6007     }
6008
6009   coding->consumed = src - coding->source;
6010   coding->consumed_char = pos - coding->src_pos;
6011   coding->charbuf_used = buf - coding->charbuf;
6012   coding->chars_at_source = 0;
6013 }
6014
6015
6016 /* Encode the text at CODING->src_object into CODING->dst_object.
6017    CODING->src_object is a buffer or a string.
6018    CODING->dst_object is a buffer or nil.
6019
6020    If CODING->src_object is a buffer, it must be the current buffer.
6021    In this case, if CODING->src_pos is positive, it is a position of
6022    the source text in the buffer, otherwise. the source text is in the
6023    gap area of the buffer, and coding->src_pos specifies the offset of
6024    the text from GPT (which must be the same as PT).  If this is the
6025    same buffer as CODING->dst_object, CODING->src_pos must be
6026    negative and CODING should not have `pre-write-conversion'.
6027
6028    If CODING->src_object is a string, CODING should not have
6029    `pre-write-conversion'.
6030
6031    If CODING->dst_object is a buffer, the encoded data is inserted at
6032    the current point of that buffer.
6033
6034    If CODING->dst_object is nil, the encoded data is placed at the
6035    memory area specified by CODING->destination.  */
6036
6037 static int
6038 encode_coding (coding)
6039      struct coding_system *coding;
6040 {
6041   Lisp_Object attrs;
6042
6043   attrs = CODING_ID_ATTRS (coding->id);
6044
6045   if (BUFFERP (coding->dst_object))
6046     {
6047       set_buffer_internal (XBUFFER (coding->dst_object));
6048       coding->dst_multibyte
6049         = ! NILP (current_buffer->enable_multibyte_characters);
6050     }
6051
6052   coding->consumed = coding->consumed_char = 0;
6053   coding->produced = coding->produced_char = 0;
6054   coding->result = CODING_RESULT_SUCCESS;
6055   coding->errors = 0;
6056
6057   ALLOC_CONVERSION_WORK_AREA (coding);
6058
6059   do {
6060     coding_set_source (coding);
6061     consume_chars (coding);
6062
6063     if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
6064       translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs));
6065     else if (!NILP (Vstandard_translation_table_for_encode))
6066       translate_chars (coding, Vstandard_translation_table_for_encode);
6067
6068     coding_set_destination (coding);
6069     (*(coding->encoder)) (coding);
6070   } while (coding->consumed_char < coding->src_chars);
6071
6072   if (BUFFERP (coding->dst_object))
6073     insert_from_gap (coding->produced_char, coding->produced);
6074
6075   return (coding->result);
6076 }
6077
6078 /* Work buffer */
6079
6080 /* List of currently used working buffer.  */
6081 Lisp_Object Vcode_conversion_work_buf_list;
6082
6083 /* A working buffer used by the top level conversion.  */
6084 Lisp_Object Vcode_conversion_reused_work_buf;
6085
6086
6087 /* Return a working buffer that can be freely used by the following
6088    code conversion.  MULTIBYTEP specifies the multibyteness of the
6089    buffer.  */
6090
6091 Lisp_Object
6092 make_conversion_work_buffer (multibytep)
6093      int multibytep;
6094 {
6095   struct buffer *current = current_buffer;
6096   Lisp_Object buf;
6097
6098   if (NILP (Vcode_conversion_work_buf_list))
6099     {
6100       if (NILP (Vcode_conversion_reused_work_buf))
6101         Vcode_conversion_reused_work_buf
6102           = Fget_buffer_create (build_string (" *code-conversion-work*"));
6103       Vcode_conversion_work_buf_list
6104         = Fcons (Vcode_conversion_reused_work_buf, Qnil);
6105     }
6106   else
6107     {
6108       int depth = XINT (Flength (Vcode_conversion_work_buf_list));
6109       char str[128];
6110
6111       sprintf (str, " *code-conversion-work*<%d>", depth);
6112       Vcode_conversion_work_buf_list
6113         = Fcons (Fget_buffer_create (build_string (str)),
6114                  Vcode_conversion_work_buf_list);
6115     }
6116
6117   buf = XCAR (Vcode_conversion_work_buf_list);
6118   set_buffer_internal (XBUFFER (buf));
6119   current_buffer->undo_list = Qt;
6120   Ferase_buffer ();
6121   Fset_buffer_multibyte (multibytep ? Qt : Qnil, Qnil);
6122   set_buffer_internal (current);
6123   return buf;
6124 }
6125
6126 static struct coding_system *saved_coding;
6127
6128 Lisp_Object
6129 code_conversion_restore (info)
6130      Lisp_Object info;
6131 {
6132   int depth = XINT (Flength (Vcode_conversion_work_buf_list));
6133   Lisp_Object buf;
6134
6135   if (depth > 0)
6136     {
6137       buf = XCAR (Vcode_conversion_work_buf_list);
6138       Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
6139       if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
6140         Fkill_buffer (buf);
6141     }
6142
6143   if (EQ (saved_coding->dst_object, Qt)
6144       && saved_coding->destination)
6145     xfree (saved_coding->destination);
6146
6147   return save_excursion_restore (info);
6148 }
6149
6150
6151 int
6152 decode_coding_gap (coding, chars, bytes)
6153      struct coding_system *coding;
6154      EMACS_INT chars, bytes;
6155 {
6156   int count = specpdl_ptr - specpdl;
6157
6158   saved_coding = coding;
6159   record_unwind_protect (code_conversion_restore, save_excursion_save ());
6160
6161   coding->src_object = Fcurrent_buffer ();
6162   coding->src_chars = chars;
6163   coding->src_bytes = bytes;
6164   coding->src_pos = -chars;
6165   coding->src_pos_byte = -bytes;
6166   coding->src_multibyte = chars < bytes;
6167   coding->dst_object = coding->src_object;
6168   coding->dst_pos = PT;
6169   coding->dst_pos_byte = PT_BYTE;
6170   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6171   coding->mode |= CODING_MODE_LAST_BLOCK;
6172
6173   if (CODING_REQUIRE_DETECTION (coding))
6174     detect_coding (coding);
6175
6176   decode_coding (coding);
6177
6178   unbind_to (count, Qnil);
6179   return coding->result;
6180 }
6181
6182 int
6183 encode_coding_gap (coding, chars, bytes)
6184      struct coding_system *coding;
6185      EMACS_INT chars, bytes;
6186 {
6187   int count = specpdl_ptr - specpdl;
6188   Lisp_Object buffer;
6189
6190   saved_coding = coding;
6191   record_unwind_protect (code_conversion_restore, save_excursion_save ());
6192
6193   buffer = Fcurrent_buffer ();
6194   coding->src_object = buffer;
6195   coding->src_chars = chars;
6196   coding->src_bytes = bytes;
6197   coding->src_pos = -chars;
6198   coding->src_pos_byte = -bytes;
6199   coding->src_multibyte = chars < bytes;
6200   coding->dst_object = coding->src_object;
6201   coding->dst_pos = PT;
6202   coding->dst_pos_byte = PT_BYTE;
6203
6204   encode_coding (coding);
6205
6206   unbind_to (count, Qnil);
6207   return coding->result;
6208 }
6209
6210
6211 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6212    SRC_OBJECT into DST_OBJECT by coding context CODING.
6213
6214    SRC_OBJECT is a buffer, a string, or Qnil.
6215
6216    If it is a buffer, the text is at point of the buffer.  FROM and TO
6217    are positions in the buffer.
6218
6219    If it is a string, the text is at the beginning of the string.
6220    FROM and TO are indices to the string.
6221
6222    If it is nil, the text is at coding->source.  FROM and TO are
6223    indices to coding->source.
6224
6225    DST_OBJECT is a buffer, Qt, or Qnil.
6226
6227    If it is a buffer, the decoded text is inserted at point of the
6228    buffer.  If the buffer is the same as SRC_OBJECT, the source text
6229    is deleted.
6230
6231    If it is Qt, a string is made from the decoded text, and
6232    set in CODING->dst_object.
6233
6234    If it is Qnil, the decoded text is stored at CODING->destination.
6235    The called must allocate CODING->dst_bytes bytes at
6236    CODING->destination by xmalloc.  If the decoded text is longer than
6237    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6238  */
6239
6240 void
6241 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6242                       dst_object)
6243      struct coding_system *coding;
6244      Lisp_Object src_object;
6245      EMACS_INT from, from_byte, to, to_byte;
6246      Lisp_Object dst_object;
6247 {
6248   int count = specpdl_ptr - specpdl;
6249   unsigned char *destination;
6250   EMACS_INT dst_bytes;
6251   EMACS_INT chars = to - from;
6252   EMACS_INT bytes = to_byte - from_byte;
6253   Lisp_Object attrs;
6254
6255   saved_coding = coding;
6256   record_unwind_protect (code_conversion_restore, save_excursion_save ());
6257
6258   if (NILP (dst_object))
6259     {
6260       destination = coding->destination;
6261       dst_bytes = coding->dst_bytes;
6262     }
6263
6264   coding->src_object = src_object;
6265   coding->src_chars = chars;
6266   coding->src_bytes = bytes;
6267   coding->src_multibyte = chars < bytes;
6268
6269   if (STRINGP (src_object))
6270     {
6271       coding->src_pos = from;
6272       coding->src_pos_byte = from_byte;
6273     }
6274   else if (BUFFERP (src_object))
6275     {
6276       set_buffer_internal (XBUFFER (src_object));
6277       if (from != GPT)
6278         move_gap_both (from, from_byte);
6279       if (EQ (src_object, dst_object))
6280         {
6281           TEMP_SET_PT_BOTH (from, from_byte);
6282           del_range_both (from, from_byte, to, to_byte, 1);
6283           coding->src_pos = -chars;
6284           coding->src_pos_byte = -bytes;
6285         }
6286       else
6287         {
6288           coding->src_pos = from;
6289           coding->src_pos_byte = from_byte;
6290         }
6291     }
6292
6293   if (CODING_REQUIRE_DETECTION (coding))
6294     detect_coding (coding);
6295   attrs = CODING_ID_ATTRS (coding->id);
6296
6297   if (! NILP (CODING_ATTR_POST_READ (attrs))
6298       || EQ (dst_object, Qt))
6299     {
6300       coding->dst_object = make_conversion_work_buffer (1);
6301       coding->dst_pos = BEG;
6302       coding->dst_pos_byte = BEG_BYTE;
6303       coding->dst_multibyte = 1;
6304     }
6305   else if (BUFFERP (dst_object))
6306     {
6307       coding->dst_object = dst_object;
6308       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6309       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6310       coding->dst_multibyte
6311         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6312     }
6313   else
6314     {
6315       coding->dst_object = Qnil;
6316       coding->dst_multibyte = 1;
6317     }
6318
6319   decode_coding (coding);
6320
6321   if (BUFFERP (coding->dst_object))
6322     set_buffer_internal (XBUFFER (coding->dst_object));
6323
6324   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6325     {
6326       struct gcpro gcpro1, gcpro2;
6327       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6328       Lisp_Object val;
6329
6330       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6331       GCPRO2 (coding->src_object, coding->dst_object);
6332       val = call1 (CODING_ATTR_POST_READ (attrs),
6333                    make_number (coding->produced_char));
6334       UNGCPRO;
6335       CHECK_NATNUM (val);
6336       coding->produced_char += Z - prev_Z;
6337       coding->produced += Z_BYTE - prev_Z_BYTE;
6338     }
6339
6340   if (EQ (dst_object, Qt))
6341     {
6342       coding->dst_object = Fbuffer_string ();
6343     }
6344   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6345     {
6346       set_buffer_internal (XBUFFER (coding->dst_object));
6347       if (dst_bytes < coding->produced)
6348         {
6349           destination
6350             = (unsigned char *) xrealloc (destination, coding->produced);
6351           if (! destination)
6352             {
6353               coding->result = CODING_RESULT_INSUFFICIENT_DST;
6354               unbind_to (count, Qnil);
6355               return;
6356             }
6357           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6358             move_gap_both (BEGV, BEGV_BYTE);
6359           bcopy (BEGV_ADDR, destination, coding->produced);
6360           coding->destination = destination;
6361         }
6362     }
6363
6364   unbind_to (count, Qnil);
6365 }
6366
6367
6368 void
6369 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6370                       dst_object)
6371      struct coding_system *coding;
6372      Lisp_Object src_object;
6373      EMACS_INT from, from_byte, to, to_byte;
6374      Lisp_Object dst_object;
6375 {
6376   int count = specpdl_ptr - specpdl;
6377   EMACS_INT chars = to - from;
6378   EMACS_INT bytes = to_byte - from_byte;
6379   Lisp_Object attrs;
6380
6381   saved_coding = coding;
6382   record_unwind_protect (code_conversion_restore, save_excursion_save ());
6383
6384   coding->src_object = src_object;
6385   coding->src_chars = chars;
6386   coding->src_bytes = bytes;
6387   coding->src_multibyte = chars < bytes;
6388
6389   attrs = CODING_ID_ATTRS (coding->id);
6390
6391   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6392     {
6393       coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
6394       set_buffer_internal (XBUFFER (coding->src_object));
6395       if (STRINGP (src_object))
6396         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6397       else if (BUFFERP (src_object))
6398         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6399       else
6400         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6401
6402       if (EQ (src_object, dst_object))
6403         {
6404           set_buffer_internal (XBUFFER (src_object));
6405           del_range_both (from, from_byte, to, to_byte, 1);
6406           set_buffer_internal (XBUFFER (coding->src_object));
6407         }
6408
6409       call2 (CODING_ATTR_PRE_WRITE (attrs),
6410              make_number (BEG), make_number (Z));
6411       coding->src_object = Fcurrent_buffer ();
6412       if (BEG != GPT)
6413         move_gap_both (BEG, BEG_BYTE);
6414       coding->src_chars = Z - BEG;
6415       coding->src_bytes = Z_BYTE - BEG_BYTE;
6416       coding->src_pos = BEG;
6417       coding->src_pos_byte = BEG_BYTE;
6418       coding->src_multibyte = Z < Z_BYTE;
6419     }
6420   else if (STRINGP (src_object))
6421     {
6422       coding->src_pos = from;
6423       coding->src_pos_byte = from_byte;
6424     }
6425   else if (BUFFERP (src_object))
6426     {
6427       set_buffer_internal (XBUFFER (src_object));
6428       if (EQ (src_object, dst_object))
6429         {
6430           coding->src_object = del_range_1 (from, to, 1, 1);
6431           coding->src_pos = 0;
6432           coding->src_pos_byte = 0;
6433         }
6434       else
6435         {
6436           if (from < GPT && to >= GPT)
6437             move_gap_both (from, from_byte);
6438           coding->src_pos = from;
6439           coding->src_pos_byte = from_byte;
6440         }
6441     }
6442
6443   if (BUFFERP (dst_object))
6444     {
6445       coding->dst_object = dst_object;
6446       if (EQ (src_object, dst_object))
6447         {
6448           coding->dst_pos = from;
6449           coding->dst_pos_byte = from_byte;
6450         }
6451       else
6452         {
6453           coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6454           coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6455         }
6456       coding->dst_multibyte
6457         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6458     }
6459   else if (EQ (dst_object, Qt))
6460     {
6461       coding->dst_object = Qnil;
6462       coding->dst_bytes = coding->src_chars;
6463       if (coding->dst_bytes == 0)
6464         coding->dst_bytes = 1;
6465       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
6466       coding->dst_multibyte = 0;
6467     }
6468   else
6469     {
6470       coding->dst_object = Qnil;
6471       coding->dst_multibyte = 0;
6472     }
6473
6474   encode_coding (coding);
6475
6476   if (EQ (dst_object, Qt))
6477     {
6478       if (BUFFERP (coding->dst_object))
6479         coding->dst_object = Fbuffer_string ();
6480       else
6481         {
6482           coding->dst_object
6483             = make_unibyte_string ((char *) coding->destination,
6484                                    coding->produced);
6485           xfree (coding->destination);
6486         }
6487     }
6488
6489   unbind_to (count, Qnil);
6490 }
6491
6492
6493 Lisp_Object
6494 preferred_coding_system ()
6495 {
6496   int id = coding_categories[coding_priorities[0]].id;
6497
6498   return CODING_ID_NAME (id);
6499 }
6500
6501 \f
6502 #ifdef emacs
6503 /*** 8. Emacs Lisp library functions ***/
6504
6505 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6506        doc: /* Return t if OBJECT is nil or a coding-system.
6507 See the documentation of `define-coding-system' for information
6508 about coding-system objects.  */)
6509      (obj)
6510      Lisp_Object obj;
6511 {
6512   return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6513 }
6514
6515 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6516        Sread_non_nil_coding_system, 1, 1, 0,
6517        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6518      (prompt)
6519      Lisp_Object prompt;
6520 {
6521   Lisp_Object val;
6522   do
6523     {
6524       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6525                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6526     }
6527   while (XSTRING (val)->size == 0);
6528   return (Fintern (val, Qnil));
6529 }
6530
6531 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6532        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6533 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6534      (prompt, default_coding_system)
6535      Lisp_Object prompt, default_coding_system;
6536 {
6537   Lisp_Object val;
6538   if (SYMBOLP (default_coding_system))
6539     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
6540   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6541                           Qt, Qnil, Qcoding_system_history,
6542                           default_coding_system, Qnil);
6543   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
6544 }
6545
6546 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6547        1, 1, 0,
6548        doc: /* Check validity of CODING-SYSTEM.
6549 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.  */)
6550   (coding_system)
6551      Lisp_Object coding_system;
6552 {
6553   CHECK_SYMBOL (coding_system);
6554   if (!NILP (Fcoding_system_p (coding_system)))
6555     return coding_system;
6556   while (1)
6557     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6558 }
6559
6560 \f
6561 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
6562    HIGHEST is nonzero, return the coding system of the highest
6563    priority among the detected coding systems.  Otherwize return a
6564    list of detected coding systems sorted by their priorities.  If
6565    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6566    multibyte form but contains only ASCII and eight-bit chars.
6567    Otherwise, the bytes are raw bytes.
6568
6569    CODING-SYSTEM controls the detection as below:
6570
6571    If it is nil, detect both text-format and eol-format.  If the
6572    text-format part of CODING-SYSTEM is already specified
6573    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
6574    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6575    detect only text-format.  */
6576
6577 Lisp_Object
6578 detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
6579      unsigned char *src;
6580      int src_bytes, highest;
6581      int multibytep;
6582      Lisp_Object coding_system;
6583 {
6584   unsigned char *src_end = src + src_bytes;
6585   int mask = CATEGORY_MASK_ANY;
6586   Lisp_Object attrs, eol_type;
6587   Lisp_Object val;
6588   struct coding_system coding;
6589   int id;
6590   struct coding_detection_info detect_info;
6591
6592   if (NILP (coding_system))
6593     coding_system = Qundecided;
6594   setup_coding_system (coding_system, &coding);
6595   attrs = CODING_ID_ATTRS (coding.id);
6596   eol_type = CODING_ID_EOL_TYPE (coding.id);
6597   coding_system = CODING_ATTR_BASE_NAME (attrs);
6598
6599   coding.source = src;
6600   coding.src_bytes = src_bytes;
6601   coding.src_multibyte = multibytep;
6602   coding.consumed = 0;
6603   coding.mode |= CODING_MODE_LAST_BLOCK;
6604
6605   detect_info.checked = detect_info.found = detect_info.rejected = 0;
6606
6607   /* At first, detect text-format if necessary.  */
6608   if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided)
6609     {
6610       enum coding_category category;
6611       struct coding_system *this;
6612       int c, i;
6613
6614       for (; src < src_end; src++)
6615         {
6616           c = *src;
6617           if (c & 0x80
6618               || (c < 0x20 && (c == ISO_CODE_ESC
6619                                || c == ISO_CODE_SI
6620                                || c == ISO_CODE_SO)))
6621             break;
6622         }
6623       coding.head_ascii = src - coding.source;
6624
6625       if (src < src_end)
6626         for (i = 0; i < coding_category_raw_text; i++)
6627           {
6628             category = coding_priorities[i];
6629             this = coding_categories + category;
6630
6631             if (this->id < 0)
6632               {
6633                 /* No coding system of this category is defined.  */
6634                 detect_info.rejected |= (1 << category);
6635               }
6636             else if (category >= coding_category_raw_text)
6637               continue;
6638             else if (detect_info.checked & (1 << category))
6639               {
6640                 if (highest
6641                     && (detect_info.found & (1 << category)))
6642                   break;
6643               }
6644             else
6645               {
6646                 if ((*(this->detector)) (&coding, &detect_info)
6647                     && highest
6648                     && (detect_info.found & (1 << category)))
6649                   break;
6650               }
6651           }
6652
6653
6654       if (detect_info.rejected == CATEGORY_MASK_ANY)
6655         {
6656           detect_info.found = CATEGORY_MASK_RAW_TEXT;
6657           id = coding_categories[coding_category_raw_text].id;
6658           val = Fcons (make_number (id), Qnil);
6659         }
6660       else if (! detect_info.rejected && ! detect_info.found)
6661         {
6662           detect_info.found = CATEGORY_MASK_ANY;
6663           id = coding_categories[coding_category_undecided].id;
6664           val = Fcons (make_number (id), Qnil);
6665         }
6666       else if (highest)
6667         {
6668           if (detect_info.found)
6669             {
6670               detect_info.found = 1 << category;
6671               val = Fcons (make_number (this->id), Qnil);
6672             }
6673           else
6674             for (i = 0; i < coding_category_raw_text; i++)
6675               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6676                 {
6677                   detect_info.found = 1 << coding_priorities[i];
6678                   id = coding_categories[coding_priorities[i]].id;
6679                   val = Fcons (make_number (id), Qnil);
6680                   break;
6681                 }
6682         }
6683       else
6684         {
6685           int mask = detect_info.rejected | detect_info.found;
6686           int found = 0;
6687           val = Qnil;
6688
6689           for (i = coding_category_raw_text - 1; i >= 0; i--)
6690             {
6691               category = coding_priorities[i];
6692               if (! (mask & (1 << category)))
6693                 {
6694                   found |= 1 << category;
6695                   id = coding_categories[category].id;
6696                   val = Fcons (make_number (id), val);
6697                 }
6698             }
6699           for (i = coding_category_raw_text - 1; i >= 0; i--)
6700             {
6701               category = coding_priorities[i];
6702               if (detect_info.found & (1 << category))
6703                 {
6704                   id = coding_categories[category].id;
6705                   val = Fcons (make_number (id), val);
6706                 }
6707             }
6708           detect_info.found |= found;
6709         }
6710     }
6711   else
6712     {
6713       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
6714       val = Fcons (make_number (coding.id), Qnil);
6715     }
6716
6717   /* Then, detect eol-format if necessary.  */
6718   {
6719     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
6720     Lisp_Object tail;
6721
6722     if (VECTORP (eol_type))
6723       {
6724         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
6725           normal_eol = detect_eol (coding.source, src_bytes,
6726                                    coding_category_raw_text);
6727         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
6728                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
6729           utf_16_be_eol = detect_eol (coding.source, src_bytes,
6730                                       coding_category_utf_16_be);
6731         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
6732                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
6733           utf_16_le_eol = detect_eol (coding.source, src_bytes,
6734                                       coding_category_utf_16_le);
6735       }
6736     else
6737       {
6738         if (EQ (eol_type, Qunix))
6739           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
6740         else if (EQ (eol_type, Qdos))
6741           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
6742         else
6743           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
6744       }
6745
6746     for (tail = val; CONSP (tail); tail = XCDR (tail))
6747       {
6748         enum coding_category category;
6749         int this_eol;
6750
6751         id = XINT (XCAR (tail));
6752         attrs = CODING_ID_ATTRS (id);
6753         category = XINT (CODING_ATTR_CATEGORY (attrs));
6754         eol_type = CODING_ID_EOL_TYPE (id);
6755         if (VECTORP (eol_type))
6756           {
6757             if (category == coding_category_utf_16_be
6758                 || category == coding_category_utf_16_be_nosig)
6759               this_eol = utf_16_be_eol;
6760             else if (category == coding_category_utf_16_le
6761                      || category == coding_category_utf_16_le_nosig)
6762               this_eol = utf_16_le_eol;
6763             else
6764               this_eol = normal_eol;
6765
6766             if (this_eol == EOL_SEEN_LF)
6767               XSETCAR (tail, AREF (eol_type, 0));
6768             else if (this_eol == EOL_SEEN_CRLF)
6769               XSETCAR (tail, AREF (eol_type, 1));
6770             else if (this_eol == EOL_SEEN_CR)
6771               XSETCAR (tail, AREF (eol_type, 2));
6772             else
6773               XSETCAR (tail, CODING_ID_NAME (id));
6774           }
6775         else
6776           XSETCAR (tail, CODING_ID_NAME (id));
6777       }
6778   }
6779
6780   return (highest ? XCAR (val) : val);
6781 }
6782
6783
6784 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6785        2, 3, 0,
6786        doc: /* Detect coding system of the text in the region between START and END.
6787 Return a list of possible coding systems ordered by priority.
6788
6789 If only ASCII characters are found, it returns a list of single element
6790 `undecided' or its subsidiary coding system according to a detected
6791 end-of-line format.
6792
6793 If optional argument HIGHEST is non-nil, return the coding system of
6794 highest priority.  */)
6795      (start, end, highest)
6796      Lisp_Object start, end, highest;
6797 {
6798   int from, to;
6799   int from_byte, to_byte;
6800
6801   CHECK_NUMBER_COERCE_MARKER (start);
6802   CHECK_NUMBER_COERCE_MARKER (end);
6803
6804   validate_region (&start, &end);
6805   from = XINT (start), to = XINT (end);
6806   from_byte = CHAR_TO_BYTE (from);
6807   to_byte = CHAR_TO_BYTE (to);
6808
6809   if (from < GPT && to >= GPT)
6810     move_gap_both (to, to_byte);
6811
6812   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6813                                to_byte - from_byte,
6814                                !NILP (highest),
6815                                !NILP (current_buffer
6816                                       ->enable_multibyte_characters),
6817                                Qnil);
6818 }
6819
6820 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6821        1, 2, 0,
6822        doc: /* Detect coding system of the text in STRING.
6823 Return a list of possible coding systems ordered by priority.
6824
6825 If only ASCII characters are found, it returns a list of single element
6826 `undecided' or its subsidiary coding system according to a detected
6827 end-of-line format.
6828
6829 If optional argument HIGHEST is non-nil, return the coding system of
6830 highest priority.  */)
6831      (string, highest)
6832      Lisp_Object string, highest;
6833 {
6834   CHECK_STRING (string);
6835
6836   return detect_coding_system (XSTRING (string)->data,
6837                                STRING_BYTES (XSTRING (string)),
6838                                !NILP (highest),
6839                                STRING_MULTIBYTE (string),
6840                                Qnil);
6841 }
6842
6843
6844 static INLINE int
6845 char_encodable_p (c, attrs)
6846      int c;
6847      Lisp_Object attrs;
6848 {
6849   Lisp_Object tail;
6850   struct charset *charset;
6851
6852   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6853        CONSP (tail); tail = XCDR (tail))
6854     {
6855       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6856       if (CHAR_CHARSET_P (c, charset))
6857         break;
6858     }
6859   return (! NILP (tail));
6860 }
6861
6862
6863 /* Return a list of coding systems that safely encode the text between
6864    START and END.  If EXCLUDE is non-nil, it is a list of coding
6865    systems not to check.  The returned list doesn't contain any such
6866    coding systems.  In any case, if the text contains only ASCII or is
6867    unibyte, return t.  */
6868
6869 DEFUN ("find-coding-systems-region-internal",
6870        Ffind_coding_systems_region_internal,
6871        Sfind_coding_systems_region_internal, 2, 3, 0,
6872        doc: /* Internal use only.  */)
6873      (start, end, exclude)
6874      Lisp_Object start, end, exclude;
6875 {
6876   Lisp_Object coding_attrs_list, safe_codings;
6877   EMACS_INT start_byte, end_byte;
6878   const unsigned char *p, *pbeg, *pend;
6879   int c;
6880   Lisp_Object tail, elt;
6881
6882   if (STRINGP (start))
6883     {
6884       if (!STRING_MULTIBYTE (start)
6885           || XSTRING (start)->size == STRING_BYTES (XSTRING (start)))
6886         return Qt;
6887       start_byte = 0;
6888       end_byte = STRING_BYTES (XSTRING (start));
6889     }
6890   else
6891     {
6892       CHECK_NUMBER_COERCE_MARKER (start);
6893       CHECK_NUMBER_COERCE_MARKER (end);
6894       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6895         args_out_of_range (start, end);
6896       if (NILP (current_buffer->enable_multibyte_characters))
6897         return Qt;
6898       start_byte = CHAR_TO_BYTE (XINT (start));
6899       end_byte = CHAR_TO_BYTE (XINT (end));
6900       if (XINT (end) - XINT (start) == end_byte - start_byte)
6901         return Qt;
6902
6903       if (XINT (start) < GPT && XINT (end) > GPT)
6904         {
6905           if ((GPT - XINT (start)) < (XINT (end) - GPT))
6906             move_gap_both (XINT (start), start_byte);
6907           else
6908             move_gap_both (XINT (end), end_byte);
6909         }
6910     }
6911
6912   coding_attrs_list = Qnil;
6913   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6914     if (NILP (exclude)
6915         || NILP (Fmemq (XCAR (tail), exclude)))
6916       {
6917         Lisp_Object attrs;
6918
6919         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6920         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6921             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6922           coding_attrs_list = Fcons (attrs, coding_attrs_list);
6923       }
6924
6925   if (STRINGP (start))
6926     p = pbeg = XSTRING (start)->data;
6927   else
6928     p = pbeg = BYTE_POS_ADDR (start_byte);
6929   pend = p + (end_byte - start_byte);
6930
6931   while (p < pend && ASCII_BYTE_P (*p)) p++;
6932   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6933
6934   while (p < pend)
6935     {
6936       if (ASCII_BYTE_P (*p))
6937         p++;
6938       else
6939         {
6940           c = STRING_CHAR_ADVANCE (p);
6941
6942           charset_map_loaded = 0;
6943           for (tail = coding_attrs_list; CONSP (tail);)
6944             {
6945               elt = XCAR (tail);
6946               if (NILP (elt))
6947                 tail = XCDR (tail);
6948               else if (char_encodable_p (c, elt))
6949                 tail = XCDR (tail);
6950               else if (CONSP (XCDR (tail)))
6951                 {
6952                   XSETCAR (tail, XCAR (XCDR (tail)));
6953                   XSETCDR (tail, XCDR (XCDR (tail)));
6954                 }
6955               else
6956                 {
6957                   XSETCAR (tail, Qnil);
6958                   tail = XCDR (tail);
6959                 }
6960             }
6961           if (charset_map_loaded)
6962             {
6963               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6964
6965               if (STRINGP (start))
6966                 pbeg = XSTRING (start)->data;
6967               else
6968                 pbeg = BYTE_POS_ADDR (start_byte);
6969               p = pbeg + p_offset;
6970               pend = pbeg + pend_offset;
6971             }
6972         }
6973     }
6974
6975   safe_codings = Qnil;
6976   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6977     if (! NILP (XCAR (tail)))
6978       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6979
6980   return safe_codings;
6981 }
6982
6983
6984 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6985        Scheck_coding_systems_region, 3, 3, 0,
6986        doc: /* Check if the region is encodable by coding systems.
6987
6988 START and END are buffer positions specifying the region.
6989 CODING-SYSTEM-LIST is a list of coding systems to check.
6990
6991 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6992 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6993 whole region, POS0, POS1, ... are buffer positions where non-encodable
6994 characters are found.
6995
6996 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6997 value is nil.
6998
6999 START may be a string.  In that case, check if the string is
7000 encodable, and the value contains indices to the string instead of
7001 buffer positions.  END is ignored.  */)
7002      (start, end, coding_system_list)
7003      Lisp_Object start, end, coding_system_list;
7004 {
7005   Lisp_Object list;
7006   EMACS_INT start_byte, end_byte;
7007   int pos;
7008   const unsigned char *p, *pbeg, *pend;
7009   int c;
7010   Lisp_Object tail, elt;
7011
7012   if (STRINGP (start))
7013     {
7014       if (!STRING_MULTIBYTE (start)
7015           && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
7016         return Qnil;
7017       start_byte = 0;
7018       end_byte = STRING_BYTES (XSTRING (start));
7019       pos = 0;
7020     }
7021   else
7022     {
7023       CHECK_NUMBER_COERCE_MARKER (start);
7024       CHECK_NUMBER_COERCE_MARKER (end);
7025       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7026         args_out_of_range (start, end);
7027       if (NILP (current_buffer->enable_multibyte_characters))
7028         return Qnil;
7029       start_byte = CHAR_TO_BYTE (XINT (start));
7030       end_byte = CHAR_TO_BYTE (XINT (end));
7031       if (XINT (end) - XINT (start) == end_byte - start_byte)
7032         return Qt;
7033
7034       if (XINT (start) < GPT && XINT (end) > GPT)
7035         {
7036           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7037             move_gap_both (XINT (start), start_byte);
7038           else
7039             move_gap_both (XINT (end), end_byte);
7040         }
7041       pos = XINT (start);
7042     }
7043
7044   list = Qnil;
7045   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7046     {
7047       elt = XCAR (tail);
7048       list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
7049                                        Qnil)),
7050                     list);
7051     }
7052
7053   if (STRINGP (start))
7054     p = pbeg = XSTRING (start)->data;
7055   else
7056     p = pbeg = BYTE_POS_ADDR (start_byte);
7057   pend = p + (end_byte - start_byte);
7058
7059   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7060   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7061
7062   while (p < pend)
7063     {
7064       if (ASCII_BYTE_P (*p))
7065         p++;
7066       else
7067         {
7068           c = STRING_CHAR_ADVANCE (p);
7069
7070           charset_map_loaded = 0;
7071           for (tail = list; CONSP (tail); tail = XCDR (tail))
7072             {
7073               elt = XCDR (XCAR (tail));
7074               if (! char_encodable_p (c, XCAR (elt)))
7075                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7076             }
7077           if (charset_map_loaded)
7078             {
7079               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7080
7081               if (STRINGP (start))
7082                 pbeg = XSTRING (start)->data;
7083               else
7084                 pbeg = BYTE_POS_ADDR (start_byte);
7085               p = pbeg + p_offset;
7086               pend = pbeg + pend_offset;
7087             }
7088         }
7089       pos++;
7090     }
7091
7092   tail = list;
7093   list = Qnil;
7094   for (; CONSP (tail); tail = XCDR (tail))
7095     {
7096       elt = XCAR (tail);
7097       if (CONSP (XCDR (XCDR (elt))))
7098         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7099                       list);
7100     }
7101
7102   return list;
7103 }
7104
7105
7106
7107 Lisp_Object
7108 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7109      Lisp_Object start, end, coding_system, dst_object;
7110      int encodep, norecord;
7111 {
7112   struct coding_system coding;
7113   EMACS_INT from, from_byte, to, to_byte;
7114   Lisp_Object src_object;
7115
7116   CHECK_NUMBER_COERCE_MARKER (start);
7117   CHECK_NUMBER_COERCE_MARKER (end);
7118   if (NILP (coding_system))
7119     coding_system = Qno_conversion;
7120   else
7121     CHECK_CODING_SYSTEM (coding_system);
7122   src_object = Fcurrent_buffer ();
7123   if (NILP (dst_object))
7124     dst_object = src_object;
7125   else if (! EQ (dst_object, Qt))
7126     CHECK_BUFFER (dst_object);
7127
7128   validate_region (&start, &end);
7129   from = XFASTINT (start);
7130   from_byte = CHAR_TO_BYTE (from);
7131   to = XFASTINT (end);
7132   to_byte = CHAR_TO_BYTE (to);
7133
7134   setup_coding_system (coding_system, &coding);
7135   coding.mode |= CODING_MODE_LAST_BLOCK;
7136
7137   if (encodep)
7138     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7139                           dst_object);
7140   else
7141     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7142                           dst_object);
7143   if (! norecord)
7144     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7145
7146   if (coding.result != CODING_RESULT_SUCCESS)
7147     error ("Code conversion error: %d", coding.result);
7148
7149   return (BUFFERP (dst_object)
7150           ? make_number (coding.produced_char)
7151           : coding.dst_object);
7152 }
7153
7154
7155 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
7156        3, 4, "r\nzCoding system: ",
7157        doc: /* Decode the current region from the specified coding system.
7158 When called from a program, takes four arguments:
7159         START, END, CODING-SYSTEM, and DESTINATION.
7160 START and END are buffer positions.
7161
7162 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7163 If nil, the region between START and END is replace by the decoded text.
7164 If buffer, the decoded text is inserted in the buffer.
7165 If t, the decoded text is returned.
7166
7167 This function sets `last-coding-system-used' to the precise coding system
7168 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7169 not fully specified.)
7170 It returns the length of the decoded text.  */)
7171      (start, end, coding_system, destination)
7172      Lisp_Object start, end, coding_system, destination;
7173 {
7174   return code_convert_region (start, end, coding_system, destination, 0, 0);
7175 }
7176
7177 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
7178        3, 4, "r\nzCoding system: ",
7179        doc: /* Encode the current region by specified coding system.
7180 When called from a program, takes three arguments:
7181 START, END, and CODING-SYSTEM.  START and END are buffer positions.
7182
7183 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7184 If nil, the region between START and END is replace by the encoded text.
7185 If buffer, the encoded text is inserted in the buffer.
7186 If t, the encoded text is returned.
7187
7188 This function sets `last-coding-system-used' to the precise coding system
7189 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7190 not fully specified.)
7191 It returns the length of the encoded text.  */)
7192   (start, end, coding_system, destination)
7193      Lisp_Object start, end, coding_system, destination;
7194 {
7195   return code_convert_region (start, end, coding_system, destination, 1, 0);
7196 }
7197
7198 Lisp_Object
7199 code_convert_string (string, coding_system, dst_object,
7200                      encodep, nocopy, norecord)
7201      Lisp_Object string, coding_system, dst_object;
7202      int encodep, nocopy, norecord;
7203 {
7204   struct coding_system coding;
7205   EMACS_INT chars, bytes;
7206
7207   CHECK_STRING (string);
7208   if (NILP (coding_system))
7209     {
7210       if (! norecord)
7211         Vlast_coding_system_used = Qno_conversion;
7212       if (NILP (dst_object))
7213         return (nocopy ? Fcopy_sequence (string) : string);
7214     }
7215
7216   if (NILP (coding_system))
7217     coding_system = Qno_conversion;
7218   else
7219     CHECK_CODING_SYSTEM (coding_system);
7220   if (NILP (dst_object))
7221     dst_object = Qt;
7222   else if (! EQ (dst_object, Qt))
7223     CHECK_BUFFER (dst_object);
7224
7225   setup_coding_system (coding_system, &coding);
7226   coding.mode |= CODING_MODE_LAST_BLOCK;
7227   chars = XSTRING (string)->size;
7228   bytes = STRING_BYTES (XSTRING (string));
7229   if (encodep)
7230     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7231   else
7232     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7233   if (! norecord)
7234     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7235
7236   if (coding.result != CODING_RESULT_SUCCESS)
7237     error ("Code conversion error: %d", coding.result);
7238
7239   return (BUFFERP (dst_object)
7240           ? make_number (coding.produced_char)
7241           : coding.dst_object);
7242 }
7243
7244
7245 /* Encode or decode STRING according to CODING_SYSTEM.
7246    Do not set Vlast_coding_system_used.
7247
7248    This function is called only from macros DECODE_FILE and
7249    ENCODE_FILE, thus we ignore character composition.  */
7250
7251 Lisp_Object
7252 code_convert_string_norecord (string, coding_system, encodep)
7253      Lisp_Object string, coding_system;
7254      int encodep;
7255 {
7256   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
7257 }
7258
7259
7260 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7261        2, 4, 0,
7262        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7263
7264 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7265 if the decoding operation is trivial.
7266
7267 Optional fourth arg BUFFER non-nil meant that the decoded text is
7268 inserted in BUFFER instead of returned as a string.  In this case,
7269 the return value is BUFFER.
7270
7271 This function sets `last-coding-system-used' to the precise coding system
7272 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7273 not fully specified.  */)
7274   (string, coding_system, nocopy, buffer)
7275      Lisp_Object string, coding_system, nocopy, buffer;
7276 {
7277   return code_convert_string (string, coding_system, buffer,
7278                               0, ! NILP (nocopy), 0);
7279 }
7280
7281 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7282        2, 4, 0,
7283        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7284
7285 Optional third arg NOCOPY non-nil means it is OK to return STRING
7286 itself if the encoding operation is trivial.
7287
7288 Optional fourth arg BUFFER non-nil meant that the encoded text is
7289 inserted in BUFFER instead of returned as a string.  In this case,
7290 the return value is BUFFER.
7291
7292 This function sets `last-coding-system-used' to the precise coding system
7293 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7294 not fully specified.)  */)
7295      (string, coding_system, nocopy, buffer)
7296      Lisp_Object string, coding_system, nocopy, buffer;
7297 {
7298   return code_convert_string (string, coding_system, buffer,
7299                               1, ! NILP (nocopy), 1);
7300 }
7301
7302 \f
7303 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7304        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7305 Return the corresponding character.  */)
7306      (code)
7307      Lisp_Object code;
7308 {
7309   Lisp_Object spec, attrs, val;
7310   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7311   int c;
7312
7313   CHECK_NATNUM (code);
7314   c = XFASTINT (code);
7315   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7316   attrs = AREF (spec, 0);
7317
7318   if (ASCII_BYTE_P (c)
7319       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7320     return code;
7321
7322   val = CODING_ATTR_CHARSET_LIST (attrs);
7323   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7324   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7325   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
7326
7327   if (c <= 0x7F)
7328     charset = charset_roman;
7329   else if (c >= 0xA0 && c < 0xDF)
7330     {
7331       charset = charset_kana;
7332       c -= 0x80;
7333     }
7334   else
7335     {
7336       int s1 = c >> 8, s2 = c & 0xFF;
7337
7338       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7339           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7340         error ("Invalid code: %d", code);
7341       SJIS_TO_JIS (c);
7342       charset = charset_kanji;
7343     }
7344   c = DECODE_CHAR (charset, c);
7345   if (c < 0)
7346     error ("Invalid code: %d", code);
7347   return make_number (c);
7348 }
7349
7350
7351 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7352        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7353 Return the corresponding code in SJIS.  */)
7354      (ch)
7355     Lisp_Object ch;
7356 {
7357   Lisp_Object spec, attrs, charset_list;
7358   int c;
7359   struct charset *charset;
7360   unsigned code;
7361
7362   CHECK_CHARACTER (ch);
7363   c = XFASTINT (ch);
7364   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7365   attrs = AREF (spec, 0);
7366
7367   if (ASCII_CHAR_P (c)
7368       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7369     return ch;
7370
7371   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7372   charset = char_charset (c, charset_list, &code);
7373   if (code == CHARSET_INVALID_CODE (charset))
7374     error ("Can't encode by shift_jis encoding: %d", c);
7375   JIS_TO_SJIS (code);
7376
7377   return make_number (code);
7378 }
7379
7380 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7381        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7382 Return the corresponding character.  */)
7383      (code)
7384      Lisp_Object code;
7385 {
7386   Lisp_Object spec, attrs, val;
7387   struct charset *charset_roman, *charset_big5, *charset;
7388   int c;
7389
7390   CHECK_NATNUM (code);
7391   c = XFASTINT (code);
7392   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7393   attrs = AREF (spec, 0);
7394
7395   if (ASCII_BYTE_P (c)
7396       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7397     return code;
7398
7399   val = CODING_ATTR_CHARSET_LIST (attrs);
7400   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7401   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7402
7403   if (c <= 0x7F)
7404     charset = charset_roman;
7405   else
7406     {
7407       int b1 = c >> 8, b2 = c & 0x7F;
7408       if (b1 < 0xA1 || b1 > 0xFE
7409           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7410         error ("Invalid code: %d", code);
7411       charset = charset_big5;
7412     }
7413   c = DECODE_CHAR (charset, (unsigned )c);
7414   if (c < 0)
7415     error ("Invalid code: %d", code);
7416   return make_number (c);
7417 }
7418
7419 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7420        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7421 Return the corresponding character code in Big5.  */)
7422      (ch)
7423      Lisp_Object ch;
7424 {
7425   Lisp_Object spec, attrs, charset_list;
7426   struct charset *charset;
7427   int c;
7428   unsigned code;
7429
7430   CHECK_CHARACTER (ch);
7431   c = XFASTINT (ch);
7432   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7433   attrs = AREF (spec, 0);
7434   if (ASCII_CHAR_P (c)
7435       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7436     return ch;
7437
7438   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7439   charset = char_charset (c, charset_list, &code);
7440   if (code == CHARSET_INVALID_CODE (charset))
7441     error ("Can't encode by Big5 encoding: %d", c);
7442
7443   return make_number (code);
7444 }
7445
7446 \f
7447 DEFUN ("set-terminal-coding-system-internal",
7448        Fset_terminal_coding_system_internal,
7449        Sset_terminal_coding_system_internal, 1, 1, 0,
7450        doc: /* Internal use only.  */)
7451      (coding_system)
7452      Lisp_Object coding_system;
7453 {
7454   CHECK_SYMBOL (coding_system);
7455   setup_coding_system (Fcheck_coding_system (coding_system),
7456                         &terminal_coding);
7457
7458   /* We had better not send unsafe characters to terminal.  */
7459   terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7460   /* Characer composition should be disabled.  */
7461   terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7462   terminal_coding.src_multibyte = 1;
7463   terminal_coding.dst_multibyte = 0;
7464   return Qnil;
7465 }
7466
7467 DEFUN ("set-safe-terminal-coding-system-internal",
7468        Fset_safe_terminal_coding_system_internal,
7469        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7470        doc: /* Internal use only.  */)
7471      (coding_system)
7472      Lisp_Object coding_system;
7473 {
7474   CHECK_SYMBOL (coding_system);
7475   setup_coding_system (Fcheck_coding_system (coding_system),
7476                        &safe_terminal_coding);
7477   /* Characer composition should be disabled.  */
7478   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7479   safe_terminal_coding.src_multibyte = 1;
7480   safe_terminal_coding.dst_multibyte = 0;
7481   return Qnil;
7482 }
7483
7484 DEFUN ("terminal-coding-system",
7485        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
7486        doc: /* Return coding system specified for terminal output.  */)
7487      ()
7488 {
7489   return CODING_ID_NAME (terminal_coding.id);
7490 }
7491
7492 DEFUN ("set-keyboard-coding-system-internal",
7493        Fset_keyboard_coding_system_internal,
7494        Sset_keyboard_coding_system_internal, 1, 1, 0,
7495        doc: /* Internal use only.  */)
7496      (coding_system)
7497      Lisp_Object coding_system;
7498 {
7499   CHECK_SYMBOL (coding_system);
7500   setup_coding_system (Fcheck_coding_system (coding_system),
7501                        &keyboard_coding);
7502   /* Characer composition should be disabled.  */
7503   keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7504   return Qnil;
7505 }
7506
7507 DEFUN ("keyboard-coding-system",
7508        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
7509        doc: /* Return coding system specified for decoding keyboard input.  */)
7510      ()
7511 {
7512   return CODING_ID_NAME (keyboard_coding.id);
7513 }
7514
7515 \f
7516 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7517        Sfind_operation_coding_system,  1, MANY, 0,
7518        doc: /* Choose a coding system for an operation based on the target name.
7519 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7520 DECODING-SYSTEM is the coding system to use for decoding
7521 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7522 for encoding (in case OPERATION does encoding).
7523
7524 The first argument OPERATION specifies an I/O primitive:
7525   For file I/O, `insert-file-contents' or `write-region'.
7526   For process I/O, `call-process', `call-process-region', or `start-process'.
7527   For network I/O, `open-network-stream'.
7528
7529 The remaining arguments should be the same arguments that were passed
7530 to the primitive.  Depending on which primitive, one of those arguments
7531 is selected as the TARGET.  For example, if OPERATION does file I/O,
7532 whichever argument specifies the file name is TARGET.
7533
7534 TARGET has a meaning which depends on OPERATION:
7535   For file I/O, TARGET is a file name.
7536   For process I/O, TARGET is a process name.
7537   For network I/O, TARGET is a service name or a port number
7538
7539 This function looks up what specified for TARGET in,
7540 `file-coding-system-alist', `process-coding-system-alist',
7541 or `network-coding-system-alist' depending on OPERATION.
7542 They may specify a coding system, a cons of coding systems,
7543 or a function symbol to call.
7544 In the last case, we call the function with one argument,
7545 which is a list of all the arguments given to this function.
7546
7547 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7548      (nargs, args)
7549      int nargs;
7550      Lisp_Object *args;
7551 {
7552   Lisp_Object operation, target_idx, target, val;
7553   register Lisp_Object chain;
7554
7555   if (nargs < 2)
7556     error ("Too few arguments");
7557   operation = args[0];
7558   if (!SYMBOLP (operation)
7559       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7560     error ("Invalid first arguement");
7561   if (nargs < 1 + XINT (target_idx))
7562     error ("Too few arguments for operation: %s",
7563            XSYMBOL (operation)->name->data);
7564   target = args[XINT (target_idx) + 1];
7565   if (!(STRINGP (target)
7566         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7567     error ("Invalid %dth argument", XINT (target_idx) + 1);
7568
7569   chain = ((EQ (operation, Qinsert_file_contents)
7570             || EQ (operation, Qwrite_region))
7571            ? Vfile_coding_system_alist
7572            : (EQ (operation, Qopen_network_stream)
7573               ? Vnetwork_coding_system_alist
7574               : Vprocess_coding_system_alist));
7575   if (NILP (chain))
7576     return Qnil;
7577
7578   for (; CONSP (chain); chain = XCDR (chain))
7579     {
7580       Lisp_Object elt;
7581
7582       elt = XCAR (chain);
7583       if (CONSP (elt)
7584           && ((STRINGP (target)
7585                && STRINGP (XCAR (elt))
7586                && fast_string_match (XCAR (elt), target) >= 0)
7587               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7588         {
7589           val = XCDR (elt);
7590           /* Here, if VAL is both a valid coding system and a valid
7591              function symbol, we return VAL as a coding system.  */
7592           if (CONSP (val))
7593             return val;
7594           if (! SYMBOLP (val))
7595             return Qnil;
7596           if (! NILP (Fcoding_system_p (val)))
7597             return Fcons (val, val);
7598           if (! NILP (Ffboundp (val)))
7599             {
7600               val = call1 (val, Flist (nargs, args));
7601               if (CONSP (val))
7602                 return val;
7603               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7604                 return Fcons (val, val);
7605             }
7606           return Qnil;
7607         }
7608     }
7609   return Qnil;
7610 }
7611
7612 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7613        Sset_coding_system_priority, 0, MANY, 0,
7614        doc: /* Assign higher priority to the coding systems given as arguments.
7615 usage: (set-coding-system-priority CODING-SYSTEM ...)  */)
7616      (nargs, args)
7617      int nargs;
7618      Lisp_Object *args;
7619 {
7620   int i, j;
7621   int changed[coding_category_max];
7622   enum coding_category priorities[coding_category_max];
7623
7624   bzero (changed, sizeof changed);
7625
7626   for (i = j = 0; i < nargs; i++)
7627     {
7628       enum coding_category category;
7629       Lisp_Object spec, attrs;
7630
7631       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7632       attrs = AREF (spec, 0);
7633       category = XINT (CODING_ATTR_CATEGORY (attrs));
7634       if (changed[category])
7635         /* Ignore this coding system because a coding system of the
7636            same category already had a higher priority.  */
7637         continue;
7638       changed[category] = 1;
7639       priorities[j++] = category;
7640       if (coding_categories[category].id >= 0
7641           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7642         setup_coding_system (args[i], &coding_categories[category]);
7643     }
7644
7645   /* Now we have decided top J priorities.  Reflect the order of the
7646      original priorities to the remaining priorities.  */
7647
7648   for (i = j, j = 0; i < coding_category_max; i++, j++)
7649     {
7650       while (j < coding_category_max
7651              && changed[coding_priorities[j]])
7652         j++;
7653       if (j == coding_category_max)
7654         abort ();
7655       priorities[i] = coding_priorities[j];
7656     }
7657
7658   bcopy (priorities, coding_priorities, sizeof priorities);
7659   return Qnil;
7660 }
7661
7662 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7663        Scoding_system_priority_list, 0, 1, 0,
7664        doc: /* Return a list of coding systems ordered by their priorities.
7665 HIGHESTP non-nil means just return the highest priority one.  */)
7666      (highestp)
7667      Lisp_Object highestp;
7668 {
7669   int i;
7670   Lisp_Object val;
7671
7672   for (i = 0, val = Qnil; i < coding_category_max; i++)
7673     {
7674       enum coding_category category = coding_priorities[i];
7675       int id = coding_categories[category].id;
7676       Lisp_Object attrs;
7677
7678       if (id < 0)
7679         continue;
7680       attrs = CODING_ID_ATTRS (id);
7681       if (! NILP (highestp))
7682         return CODING_ATTR_BASE_NAME (attrs);
7683       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7684     }
7685   return Fnreverse (val);
7686 }
7687
7688 static char *suffixes[] = { "-unix", "-dos", "-mac" };
7689
7690 static Lisp_Object
7691 make_subsidiaries (base)
7692      Lisp_Object base;
7693 {
7694   Lisp_Object subsidiaries;
7695   int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7696   char *buf = (char *) alloca (base_name_len + 6);
7697   int i;
7698
7699   bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7700   subsidiaries = Fmake_vector (make_number (3), Qnil);
7701   for (i = 0; i < 3; i++)
7702     {
7703       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7704       ASET (subsidiaries, i, intern (buf));
7705     }
7706   return subsidiaries;
7707 }
7708
7709
7710 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7711        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
7712        doc: /* For internal use only.
7713 usage: (define-coding-system-internal ...)  */)
7714      (nargs, args)
7715      int nargs;
7716      Lisp_Object *args;
7717 {
7718   Lisp_Object name;
7719   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
7720   Lisp_Object attrs;            /* Vector of attributes.  */
7721   Lisp_Object eol_type;
7722   Lisp_Object aliases;
7723   Lisp_Object coding_type, charset_list, safe_charsets;
7724   enum coding_category category;
7725   Lisp_Object tail, val;
7726   int max_charset_id = 0;
7727   int i;
7728
7729   if (nargs < coding_arg_max)
7730     goto short_args;
7731
7732   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7733
7734   name = args[coding_arg_name];
7735   CHECK_SYMBOL (name);
7736   CODING_ATTR_BASE_NAME (attrs) = name;
7737
7738   val = args[coding_arg_mnemonic];
7739   if (! STRINGP (val))
7740     CHECK_CHARACTER (val);
7741   CODING_ATTR_MNEMONIC (attrs) = val;
7742
7743   coding_type = args[coding_arg_coding_type];
7744   CHECK_SYMBOL (coding_type);
7745   CODING_ATTR_TYPE (attrs) = coding_type;
7746
7747   charset_list = args[coding_arg_charset_list];
7748   if (SYMBOLP (charset_list))
7749     {
7750       if (EQ (charset_list, Qiso_2022))
7751         {
7752           if (! EQ (coding_type, Qiso_2022))
7753             error ("Invalid charset-list");
7754           charset_list = Viso_2022_charset_list;
7755         }
7756       else if (EQ (charset_list, Qemacs_mule))
7757         {
7758           if (! EQ (coding_type, Qemacs_mule))
7759             error ("Invalid charset-list");
7760           charset_list = Vemacs_mule_charset_list;
7761         }
7762       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7763         if (max_charset_id < XFASTINT (XCAR (tail)))
7764           max_charset_id = XFASTINT (XCAR (tail));
7765     }
7766   else
7767     {
7768       charset_list = Fcopy_sequence (charset_list);
7769       for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7770         {
7771           struct charset *charset;
7772
7773           val = Fcar (tail);
7774           CHECK_CHARSET_GET_CHARSET (val, charset);
7775           if (EQ (coding_type, Qiso_2022)
7776               ? CHARSET_ISO_FINAL (charset) < 0
7777               : EQ (coding_type, Qemacs_mule)
7778               ? CHARSET_EMACS_MULE_ID (charset) < 0
7779               : 0)
7780             error ("Can't handle charset `%s'",
7781                    XSYMBOL (CHARSET_NAME (charset))->name->data);
7782
7783           XCAR (tail) = make_number (charset->id);
7784           if (max_charset_id < charset->id)
7785             max_charset_id = charset->id;
7786         }
7787     }
7788   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7789
7790   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7791                                 make_number (255));
7792   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7793     XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7794   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7795
7796   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
7797
7798   val = args[coding_arg_decode_translation_table];
7799   if (! NILP (val))
7800     CHECK_CHAR_TABLE (val);
7801   CODING_ATTR_DECODE_TBL (attrs) = val;
7802
7803   val = args[coding_arg_encode_translation_table];
7804   if (! NILP (val))
7805     CHECK_CHAR_TABLE (val);
7806   CODING_ATTR_ENCODE_TBL (attrs) = val;
7807
7808   val = args[coding_arg_post_read_conversion];
7809   CHECK_SYMBOL (val);
7810   CODING_ATTR_POST_READ (attrs) = val;
7811
7812   val = args[coding_arg_pre_write_conversion];
7813   CHECK_SYMBOL (val);
7814   CODING_ATTR_PRE_WRITE (attrs) = val;
7815
7816   val = args[coding_arg_default_char];
7817   if (NILP (val))
7818     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7819   else
7820     {
7821       CHECK_CHARACTER (val);
7822       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7823     }
7824
7825   val = args[coding_arg_plist];
7826   CHECK_LIST (val);
7827   CODING_ATTR_PLIST (attrs) = val;
7828
7829   if (EQ (coding_type, Qcharset))
7830     {
7831       /* Generate a lisp vector of 256 elements.  Each element is nil,
7832          integer, or a list of charset IDs.
7833
7834          If Nth element is nil, the byte code N is invalid in this
7835          coding system.
7836
7837          If Nth element is a number NUM, N is the first byte of a
7838          charset whose ID is NUM.
7839
7840          If Nth element is a list of charset IDs, N is the first byte
7841          of one of them.  The list is sorted by dimensions of the
7842          charsets.  A charset of smaller dimension comes firtst.
7843       */
7844       val = Fmake_vector (make_number (256), Qnil);
7845
7846       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7847         {
7848           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7849           int dim = CHARSET_DIMENSION (charset);
7850           int idx = (dim - 1) * 4;
7851
7852           if (CHARSET_ASCII_COMPATIBLE_P (charset))
7853             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7854
7855           for (i = charset->code_space[idx];
7856                i <= charset->code_space[idx + 1]; i++)
7857             {
7858               Lisp_Object tmp, tmp2;
7859               int dim2;
7860
7861               tmp = AREF (val, i);
7862               if (NILP (tmp))
7863                 tmp = XCAR (tail);
7864               else if (NUMBERP (tmp))
7865                 {
7866                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
7867                   if (dim < dim2)
7868                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
7869                   else
7870                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
7871                 }
7872               else
7873                 {
7874                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
7875                     {
7876                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
7877                       if (dim < dim2)
7878                         break;
7879                     }
7880                   if (NILP (tmp2))
7881                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
7882                   else
7883                     {
7884                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
7885                       XSETCAR (tmp2, XCAR (tail));
7886                     }
7887                 }
7888               ASET (val, i, tmp);
7889             }
7890         }
7891       ASET (attrs, coding_attr_charset_valids, val);
7892       category = coding_category_charset;
7893     }
7894   else if (EQ (coding_type, Qccl))
7895     {
7896       Lisp_Object valids;
7897
7898       if (nargs < coding_arg_ccl_max)
7899         goto short_args;
7900
7901       val = args[coding_arg_ccl_decoder];
7902       CHECK_CCL_PROGRAM (val);
7903       if (VECTORP (val))
7904         val = Fcopy_sequence (val);
7905       ASET (attrs, coding_attr_ccl_decoder, val);
7906
7907       val = args[coding_arg_ccl_encoder];
7908       CHECK_CCL_PROGRAM (val);
7909       if (VECTORP (val))
7910         val = Fcopy_sequence (val);
7911       ASET (attrs, coding_attr_ccl_encoder, val);
7912
7913       val = args[coding_arg_ccl_valids];
7914       valids = Fmake_string (make_number (256), make_number (0));
7915       for (tail = val; !NILP (tail); tail = Fcdr (tail))
7916         {
7917           int from, to;
7918
7919           val = Fcar (tail);
7920           if (INTEGERP (val))
7921             {
7922               from = to = XINT (val);
7923               if (from < 0 || from > 255)
7924                 args_out_of_range_3 (val, make_number (0), make_number (255));
7925             }
7926           else
7927             {
7928               CHECK_CONS (val);
7929               CHECK_NUMBER (XCAR (val));
7930               CHECK_NUMBER (XCDR (val));
7931               from = XINT (XCAR (val));
7932               if (from < 0 || from > 255)
7933                 args_out_of_range_3 (XCAR (val),
7934                                      make_number (0), make_number (255));
7935               to = XINT (XCDR (val));
7936               if (to < from || to > 255)
7937                 args_out_of_range_3 (XCDR (val),
7938                                      XCAR (val), make_number (255));
7939             }
7940           for (i = from; i <= to; i++)
7941             XSTRING (valids)->data[i] = 1;
7942         }
7943       ASET (attrs, coding_attr_ccl_valids, valids);
7944
7945       category = coding_category_ccl;
7946     }
7947   else if (EQ (coding_type, Qutf_16))
7948     {
7949       Lisp_Object bom, endian;
7950
7951       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
7952
7953       if (nargs < coding_arg_utf16_max)
7954         goto short_args;
7955
7956       bom = args[coding_arg_utf16_bom];
7957       if (! NILP (bom) && ! EQ (bom, Qt))
7958         {
7959           CHECK_CONS (bom);
7960           CHECK_CODING_SYSTEM (XCAR (bom));
7961           CHECK_CODING_SYSTEM (XCDR (bom));
7962         }
7963       ASET (attrs, coding_attr_utf_16_bom, bom);
7964
7965       endian = args[coding_arg_utf16_endian];
7966       ASET (attrs, coding_attr_utf_16_endian, endian);
7967
7968       category = (CONSP (bom)
7969                   ? coding_category_utf_16_auto
7970                   : NILP (bom)
7971                   ? (NILP (endian)
7972                      ? coding_category_utf_16_be_nosig
7973                      : coding_category_utf_16_le_nosig)
7974                   : (NILP (endian)
7975                      ? coding_category_utf_16_be
7976                      : coding_category_utf_16_le));
7977     }
7978   else if (EQ (coding_type, Qiso_2022))
7979     {
7980       Lisp_Object initial, reg_usage, request, flags;
7981       int i, id;
7982
7983       if (nargs < coding_arg_iso2022_max)
7984         goto short_args;
7985
7986       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7987       CHECK_VECTOR (initial);
7988       for (i = 0; i < 4; i++)
7989         {
7990           val = Faref (initial, make_number (i));
7991           if (! NILP (val))
7992             {
7993               struct charset *charset;
7994
7995               CHECK_CHARSET_GET_CHARSET (val, charset);
7996               ASET (initial, i, make_number (CHARSET_ID (charset)));
7997               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
7998                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7999             }
8000           else
8001             ASET (initial, i, make_number (-1));
8002         }
8003
8004       reg_usage = args[coding_arg_iso2022_reg_usage];
8005       CHECK_CONS (reg_usage);
8006       CHECK_NATNUM (XCAR (reg_usage));
8007       CHECK_NATNUM (XCDR (reg_usage));
8008
8009       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8010       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
8011         {
8012           int id;
8013
8014           val = Fcar (tail);
8015           CHECK_CONS (val);
8016           CHECK_CHARSET_GET_ID (XCAR (val), id);
8017           CHECK_NATNUM (XCDR (val));
8018           if (XINT (XCDR (val)) >= 4)
8019             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8020           XCAR (val) = make_number (id);
8021         }
8022
8023       flags = args[coding_arg_iso2022_flags];
8024       CHECK_NATNUM (flags);
8025       i = XINT (flags);
8026       if (EQ (args[coding_arg_charset_list], Qiso_2022))
8027         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8028
8029       ASET (attrs, coding_attr_iso_initial, initial);
8030       ASET (attrs, coding_attr_iso_usage, reg_usage);
8031       ASET (attrs, coding_attr_iso_request, request);
8032       ASET (attrs, coding_attr_iso_flags, flags);
8033       setup_iso_safe_charsets (attrs);
8034
8035       if (i & CODING_ISO_FLAG_SEVEN_BITS)
8036         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8037                           | CODING_ISO_FLAG_SINGLE_SHIFT))
8038                     ? coding_category_iso_7_else
8039                     : EQ (args[coding_arg_charset_list], Qiso_2022)
8040                     ? coding_category_iso_7
8041                     : coding_category_iso_7_tight);
8042       else
8043         {
8044           int id = XINT (AREF (initial, 1));
8045
8046           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
8047                        || EQ (args[coding_arg_charset_list], Qiso_2022)
8048                        || id < 0)
8049                       ? coding_category_iso_8_else
8050                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8051                       ? coding_category_iso_8_1
8052                       : coding_category_iso_8_2);
8053         }
8054       if (category != coding_category_iso_8_1
8055           && category != coding_category_iso_8_2)
8056         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8057     }
8058   else if (EQ (coding_type, Qemacs_mule))
8059     {
8060       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8061         ASET (attrs, coding_attr_emacs_mule_full, Qt);
8062       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8063       category = coding_category_emacs_mule;
8064     }
8065   else if (EQ (coding_type, Qshift_jis))
8066     {
8067
8068       struct charset *charset;
8069
8070       if (XINT (Flength (charset_list)) != 3)
8071         error ("There should be just three charsets");
8072
8073       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8074       if (CHARSET_DIMENSION (charset) != 1)
8075         error ("Dimension of charset %s is not one",
8076                XSYMBOL (CHARSET_NAME (charset))->name->data);
8077       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8078         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8079
8080       charset_list = XCDR (charset_list);
8081       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8082       if (CHARSET_DIMENSION (charset) != 1)
8083         error ("Dimension of charset %s is not one",
8084                XSYMBOL (CHARSET_NAME (charset))->name->data);
8085
8086       charset_list = XCDR (charset_list);
8087       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8088       if (CHARSET_DIMENSION (charset) != 2)
8089         error ("Dimension of charset %s is not two",
8090                XSYMBOL (CHARSET_NAME (charset))->name->data);
8091
8092       category = coding_category_sjis;
8093       Vsjis_coding_system = name;
8094     }
8095   else if (EQ (coding_type, Qbig5))
8096     {
8097       struct charset *charset;
8098
8099       if (XINT (Flength (charset_list)) != 2)
8100         error ("There should be just two charsets");
8101
8102       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8103       if (CHARSET_DIMENSION (charset) != 1)
8104         error ("Dimension of charset %s is not one",
8105                XSYMBOL (CHARSET_NAME (charset))->name->data);
8106       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8107         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8108
8109       charset_list = XCDR (charset_list);
8110       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8111       if (CHARSET_DIMENSION (charset) != 2)
8112         error ("Dimension of charset %s is not two",
8113                XSYMBOL (CHARSET_NAME (charset))->name->data);
8114
8115       category = coding_category_big5;
8116       Vbig5_coding_system = name;
8117     }
8118   else if (EQ (coding_type, Qraw_text))
8119     {
8120       category = coding_category_raw_text;
8121       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8122     }
8123   else if (EQ (coding_type, Qutf_8))
8124     {
8125       category = coding_category_utf_8;
8126       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8127     }
8128   else if (EQ (coding_type, Qundecided))
8129     category = coding_category_undecided;
8130   else
8131     error ("Invalid coding system type: %s",
8132            XSYMBOL (coding_type)->name->data);
8133
8134   CODING_ATTR_CATEGORY (attrs) = make_number (category);
8135
8136   eol_type = args[coding_arg_eol_type];
8137   if (! NILP (eol_type)
8138       && ! EQ (eol_type, Qunix)
8139       && ! EQ (eol_type, Qdos)
8140       && ! EQ (eol_type, Qmac))
8141     error ("Invalid eol-type");
8142
8143   aliases = Fcons (name, Qnil);
8144
8145   if (NILP (eol_type))
8146     {
8147       eol_type = make_subsidiaries (name);
8148       for (i = 0; i < 3; i++)
8149         {
8150           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8151
8152           this_name = AREF (eol_type, i);
8153           this_aliases = Fcons (this_name, Qnil);
8154           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8155           this_spec = Fmake_vector (make_number (3), attrs);
8156           ASET (this_spec, 1, this_aliases);
8157           ASET (this_spec, 2, this_eol_type);
8158           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8159           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8160           Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8161                                         Vcoding_system_alist);
8162         }
8163     }
8164
8165   spec_vec = Fmake_vector (make_number (3), attrs);
8166   ASET (spec_vec, 1, aliases);
8167   ASET (spec_vec, 2, eol_type);
8168
8169   Fputhash (name, spec_vec, Vcoding_system_hash_table);
8170   Vcoding_system_list = Fcons (name, Vcoding_system_list);
8171   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8172                                 Vcoding_system_alist);
8173
8174   {
8175     int id = coding_categories[category].id;
8176
8177     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8178       setup_coding_system (name, &coding_categories[category]);
8179   }
8180
8181   return Qnil;
8182
8183  short_args:
8184   return Fsignal (Qwrong_number_of_arguments,
8185                   Fcons (intern ("define-coding-system-internal"),
8186                          make_number (nargs)));
8187 }
8188
8189 /* Fixme: should this record the alias relationships for
8190    diagnostics?  Should it update coding-system-list?  */
8191 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8192        Sdefine_coding_system_alias, 2, 2, 0,
8193        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
8194      (alias, coding_system)
8195      Lisp_Object alias, coding_system;
8196 {
8197   Lisp_Object spec, aliases, eol_type;
8198
8199   CHECK_SYMBOL (alias);
8200   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8201   aliases = AREF (spec, 1);
8202   while (!NILP (XCDR (aliases)))
8203     aliases = XCDR (aliases);
8204   XCDR (aliases) = Fcons (alias, Qnil);
8205
8206   eol_type = AREF (spec, 2);
8207   if (VECTORP (eol_type))
8208     {
8209       Lisp_Object subsidiaries;
8210       int i;
8211
8212       subsidiaries = make_subsidiaries (alias);
8213       for (i = 0; i < 3; i++)
8214         Fdefine_coding_system_alias (AREF (subsidiaries, i),
8215                                      AREF (eol_type, i));
8216
8217       ASET (spec, 2, subsidiaries);
8218     }
8219
8220   Fputhash (alias, spec, Vcoding_system_hash_table);
8221   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8222                                 Vcoding_system_alist);
8223
8224   return Qnil;
8225 }
8226
8227 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8228        1, 1, 0,
8229        doc: /* Return the base of CODING-SYSTEM.
8230 Any alias or subsidiary coding system is not a base coding system.  */)
8231   (coding_system)
8232      Lisp_Object coding_system;
8233 {
8234   Lisp_Object spec, attrs;
8235
8236   if (NILP (coding_system))
8237     return (Qno_conversion);
8238   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8239   attrs = AREF (spec, 0);
8240   return CODING_ATTR_BASE_NAME (attrs);
8241 }
8242
8243 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8244        1, 1, 0,
8245        doc: "Return the property list of CODING-SYSTEM.")
8246      (coding_system)
8247      Lisp_Object coding_system;
8248 {
8249   Lisp_Object spec, attrs;
8250
8251   if (NILP (coding_system))
8252     coding_system = Qno_conversion;
8253   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8254   attrs = AREF (spec, 0);
8255   return CODING_ATTR_PLIST (attrs);
8256 }
8257
8258
8259 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8260        1, 1, 0,
8261        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
8262      (coding_system)
8263      Lisp_Object coding_system;
8264 {
8265   Lisp_Object spec;
8266
8267   if (NILP (coding_system))
8268     coding_system = Qno_conversion;
8269   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8270   return AREF (spec, 1);
8271 }
8272
8273 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8274        Scoding_system_eol_type, 1, 1, 0,
8275        doc: /* Return eol-type of CODING-SYSTEM.
8276 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8277
8278 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8279 and CR respectively.
8280
8281 A vector value indicates that a format of end-of-line should be
8282 detected automatically.  Nth element of the vector is the subsidiary
8283 coding system whose eol-type is N.  */)
8284      (coding_system)
8285      Lisp_Object coding_system;
8286 {
8287   Lisp_Object spec, eol_type;
8288   int n;
8289
8290   if (NILP (coding_system))
8291     coding_system = Qno_conversion;
8292   if (! CODING_SYSTEM_P (coding_system))
8293     return Qnil;
8294   spec = CODING_SYSTEM_SPEC (coding_system);
8295   eol_type = AREF (spec, 2);
8296   if (VECTORP (eol_type))
8297     return Fcopy_sequence (eol_type);
8298   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8299   return make_number (n);
8300 }
8301
8302 #endif /* emacs */
8303
8304 \f
8305 /*** 9. Post-amble ***/
8306
8307 void
8308 init_coding_once ()
8309 {
8310   int i;
8311
8312   for (i = 0; i < coding_category_max; i++)
8313     {
8314       coding_categories[i].id = -1;
8315       coding_priorities[i] = i;
8316     }
8317
8318   /* ISO2022 specific initialize routine.  */
8319   for (i = 0; i < 0x20; i++)
8320     iso_code_class[i] = ISO_control_0;
8321   for (i = 0x21; i < 0x7F; i++)
8322     iso_code_class[i] = ISO_graphic_plane_0;
8323   for (i = 0x80; i < 0xA0; i++)
8324     iso_code_class[i] = ISO_control_1;
8325   for (i = 0xA1; i < 0xFF; i++)
8326     iso_code_class[i] = ISO_graphic_plane_1;
8327   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8328   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8329   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
8330   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8331   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8332   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8333   iso_code_class[ISO_CODE_ESC] = ISO_escape;
8334   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8335   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
8336   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
8337
8338   inhibit_pre_post_conversion = 0;
8339
8340   for (i = 0; i < 256; i++)
8341     {
8342       emacs_mule_bytes[i] = 1;
8343     }
8344   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
8345   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
8346   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
8347   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
8348 }
8349
8350 #ifdef emacs
8351
8352 void
8353 syms_of_coding ()
8354 {
8355   staticpro (&Vcoding_system_hash_table);
8356   Vcoding_system_hash_table = Fmakehash (Qeq);
8357
8358   staticpro (&Vsjis_coding_system);
8359   Vsjis_coding_system = Qnil;
8360
8361   staticpro (&Vbig5_coding_system);
8362   Vbig5_coding_system = Qnil;
8363
8364   staticpro (&Vcode_conversion_work_buf_list);
8365   Vcode_conversion_work_buf_list = Qnil;
8366
8367   staticpro (&Vcode_conversion_reused_work_buf);
8368   Vcode_conversion_reused_work_buf = Qnil;
8369
8370   DEFSYM (Qcharset, "charset");
8371   DEFSYM (Qtarget_idx, "target-idx");
8372   DEFSYM (Qcoding_system_history, "coding-system-history");
8373   Fset (Qcoding_system_history, Qnil);
8374
8375   /* Target FILENAME is the first argument.  */
8376   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
8377   /* Target FILENAME is the third argument.  */
8378   Fput (Qwrite_region, Qtarget_idx, make_number (2));
8379
8380   DEFSYM (Qcall_process, "call-process");
8381   /* Target PROGRAM is the first argument.  */
8382   Fput (Qcall_process, Qtarget_idx, make_number (0));
8383
8384   DEFSYM (Qcall_process_region, "call-process-region");
8385   /* Target PROGRAM is the third argument.  */
8386   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
8387
8388   DEFSYM (Qstart_process, "start-process");
8389   /* Target PROGRAM is the third argument.  */
8390   Fput (Qstart_process, Qtarget_idx, make_number (2));
8391
8392   DEFSYM (Qopen_network_stream, "open-network-stream");
8393   /* Target SERVICE is the fourth argument.  */
8394   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8395
8396   DEFSYM (Qcoding_system, "coding-system");
8397   DEFSYM (Qcoding_aliases, "coding-aliases");
8398
8399   DEFSYM (Qeol_type, "eol-type");
8400   DEFSYM (Qunix, "unix");
8401   DEFSYM (Qdos, "dos");
8402
8403   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8404   DEFSYM (Qpost_read_conversion, "post-read-conversion");
8405   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8406   DEFSYM (Qdefault_char, "default-char");
8407   DEFSYM (Qundecided, "undecided");
8408   DEFSYM (Qno_conversion, "no-conversion");
8409   DEFSYM (Qraw_text, "raw-text");
8410
8411   DEFSYM (Qiso_2022, "iso-2022");
8412
8413   DEFSYM (Qutf_8, "utf-8");
8414
8415   DEFSYM (Qutf_16, "utf-16");
8416   DEFSYM (Qutf_16_be, "utf-16-be");
8417   DEFSYM (Qutf_16_be_nosig, "utf-16-be-nosig");
8418   DEFSYM (Qutf_16_le, "utf-16-l3");
8419   DEFSYM (Qutf_16_le_nosig, "utf-16-le-nosig");
8420   DEFSYM (Qsignature, "signature");
8421   DEFSYM (Qendian, "endian");
8422   DEFSYM (Qbig, "big");
8423   DEFSYM (Qlittle, "little");
8424
8425   DEFSYM (Qshift_jis, "shift-jis");
8426   DEFSYM (Qbig5, "big5");
8427
8428   DEFSYM (Qcoding_system_p, "coding-system-p");
8429
8430   DEFSYM (Qcoding_system_error, "coding-system-error");
8431   Fput (Qcoding_system_error, Qerror_conditions,
8432         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
8433   Fput (Qcoding_system_error, Qerror_message,
8434         build_string ("Invalid coding system"));
8435
8436   /* Intern this now in case it isn't already done.
8437      Setting this variable twice is harmless.
8438      But don't staticpro it here--that is done in alloc.c.  */
8439   Qchar_table_extra_slots = intern ("char-table-extra-slots");
8440
8441   DEFSYM (Qtranslation_table, "translation-table");
8442   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
8443   DEFSYM (Qtranslation_table_id, "translation-table-id");
8444   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8445   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
8446
8447   DEFSYM (Qvalid_codes, "valid-codes");
8448
8449   DEFSYM (Qemacs_mule, "emacs-mule");
8450
8451   Vcoding_category_table
8452     = Fmake_vector (make_number (coding_category_max), Qnil);
8453   staticpro (&Vcoding_category_table);
8454   /* Followings are target of code detection.  */
8455   ASET (Vcoding_category_table, coding_category_iso_7,
8456         intern ("coding-category-iso-7"));
8457   ASET (Vcoding_category_table, coding_category_iso_7_tight,
8458         intern ("coding-category-iso-7-tight"));
8459   ASET (Vcoding_category_table, coding_category_iso_8_1,
8460         intern ("coding-category-iso-8-1"));
8461   ASET (Vcoding_category_table, coding_category_iso_8_2,
8462         intern ("coding-category-iso-8-2"));
8463   ASET (Vcoding_category_table, coding_category_iso_7_else,
8464         intern ("coding-category-iso-7-else"));
8465   ASET (Vcoding_category_table, coding_category_iso_8_else,
8466         intern ("coding-category-iso-8-else"));
8467   ASET (Vcoding_category_table, coding_category_utf_8,
8468         intern ("coding-category-utf-8"));
8469   ASET (Vcoding_category_table, coding_category_utf_16_be,
8470         intern ("coding-category-utf-16-be"));
8471   ASET (Vcoding_category_table, coding_category_utf_16_le,
8472         intern ("coding-category-utf-16-le"));
8473   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8474         intern ("coding-category-utf-16-be-nosig"));
8475   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8476         intern ("coding-category-utf-16-le-nosig"));
8477   ASET (Vcoding_category_table, coding_category_charset,
8478         intern ("coding-category-charset"));
8479   ASET (Vcoding_category_table, coding_category_sjis,
8480         intern ("coding-category-sjis"));
8481   ASET (Vcoding_category_table, coding_category_big5,
8482         intern ("coding-category-big5"));
8483   ASET (Vcoding_category_table, coding_category_ccl,
8484         intern ("coding-category-ccl"));
8485   ASET (Vcoding_category_table, coding_category_emacs_mule,
8486         intern ("coding-category-emacs-mule"));
8487   /* Followings are NOT target of code detection.  */
8488   ASET (Vcoding_category_table, coding_category_raw_text,
8489         intern ("coding-category-raw-text"));
8490   ASET (Vcoding_category_table, coding_category_undecided,
8491         intern ("coding-category-undecided"));
8492
8493   defsubr (&Scoding_system_p);
8494   defsubr (&Sread_coding_system);
8495   defsubr (&Sread_non_nil_coding_system);
8496   defsubr (&Scheck_coding_system);
8497   defsubr (&Sdetect_coding_region);
8498   defsubr (&Sdetect_coding_string);
8499   defsubr (&Sfind_coding_systems_region_internal);
8500   defsubr (&Scheck_coding_systems_region);
8501   defsubr (&Sdecode_coding_region);
8502   defsubr (&Sencode_coding_region);
8503   defsubr (&Sdecode_coding_string);
8504   defsubr (&Sencode_coding_string);
8505   defsubr (&Sdecode_sjis_char);
8506   defsubr (&Sencode_sjis_char);
8507   defsubr (&Sdecode_big5_char);
8508   defsubr (&Sencode_big5_char);
8509   defsubr (&Sset_terminal_coding_system_internal);
8510   defsubr (&Sset_safe_terminal_coding_system_internal);
8511   defsubr (&Sterminal_coding_system);
8512   defsubr (&Sset_keyboard_coding_system_internal);
8513   defsubr (&Skeyboard_coding_system);
8514   defsubr (&Sfind_operation_coding_system);
8515   defsubr (&Sset_coding_system_priority);
8516   defsubr (&Sdefine_coding_system_internal);
8517   defsubr (&Sdefine_coding_system_alias);
8518   defsubr (&Scoding_system_base);
8519   defsubr (&Scoding_system_plist);
8520   defsubr (&Scoding_system_aliases);
8521   defsubr (&Scoding_system_eol_type);
8522   defsubr (&Scoding_system_priority_list);
8523
8524   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
8525                doc: /* List of coding systems.
8526
8527 Do not alter the value of this variable manually.  This variable should be
8528 updated by the functions `define-coding-system' and
8529 `define-coding-system-alias'.  */);
8530   Vcoding_system_list = Qnil;
8531
8532   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
8533                doc: /* Alist of coding system names.
8534 Each element is one element list of coding system name.
8535 This variable is given to `completing-read' as TABLE argument.
8536
8537 Do not alter the value of this variable manually.  This variable should be
8538 updated by the functions `make-coding-system' and
8539 `define-coding-system-alias'.  */);
8540   Vcoding_system_alist = Qnil;
8541
8542   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
8543                doc: /* List of coding-categories (symbols) ordered by priority.
8544
8545 On detecting a coding system, Emacs tries code detection algorithms
8546 associated with each coding-category one by one in this order.  When
8547 one algorithm agrees with a byte sequence of source text, the coding
8548 system bound to the corresponding coding-category is selected.  */);
8549   {
8550     int i;
8551
8552     Vcoding_category_list = Qnil;
8553     for (i = coding_category_max - 1; i >= 0; i--)
8554       Vcoding_category_list
8555         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8556                  Vcoding_category_list);
8557   }
8558
8559   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
8560                doc: /* Specify the coding system for read operations.
8561 It is useful to bind this variable with `let', but do not set it globally.
8562 If the value is a coding system, it is used for decoding on read operation.
8563 If not, an appropriate element is used from one of the coding system alists:
8564 There are three such tables, `file-coding-system-alist',
8565 `process-coding-system-alist', and `network-coding-system-alist'.  */);
8566   Vcoding_system_for_read = Qnil;
8567
8568   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
8569                doc: /* Specify the coding system for write operations.
8570 Programs bind this variable with `let', but you should not set it globally.
8571 If the value is a coding system, it is used for encoding of output,
8572 when writing it to a file and when sending it to a file or subprocess.
8573
8574 If this does not specify a coding system, an appropriate element
8575 is used from one of the coding system alists:
8576 There are three such tables, `file-coding-system-alist',
8577 `process-coding-system-alist', and `network-coding-system-alist'.
8578 For output to files, if the above procedure does not specify a coding system,
8579 the value of `buffer-file-coding-system' is used.  */);
8580   Vcoding_system_for_write = Qnil;
8581
8582   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
8583                doc: /*
8584 Coding system used in the latest file or process I/O.  */);
8585   Vlast_coding_system_used = Qnil;
8586
8587   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
8588                doc: /*
8589 *Non-nil means always inhibit code conversion of end-of-line format.
8590 See info node `Coding Systems' and info node `Text and Binary' concerning
8591 such conversion.  */);
8592   inhibit_eol_conversion = 0;
8593
8594   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
8595                doc: /*
8596 Non-nil means process buffer inherits coding system of process output.
8597 Bind it to t if the process output is to be treated as if it were a file
8598 read from some filesystem.  */);
8599   inherit_process_coding_system = 0;
8600
8601   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
8602                doc: /*
8603 Alist to decide a coding system to use for a file I/O operation.
8604 The format is ((PATTERN . VAL) ...),
8605 where PATTERN is a regular expression matching a file name,
8606 VAL is a coding system, a cons of coding systems, or a function symbol.
8607 If VAL is a coding system, it is used for both decoding and encoding
8608 the file contents.
8609 If VAL is a cons of coding systems, the car part is used for decoding,
8610 and the cdr part is used for encoding.
8611 If VAL is a function symbol, the function must return a coding system
8612 or a cons of coding systems which are used as above.  The function gets
8613 the arguments with which `find-operation-coding-systems' was called.
8614
8615 See also the function `find-operation-coding-system'
8616 and the variable `auto-coding-alist'.  */);
8617   Vfile_coding_system_alist = Qnil;
8618
8619   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
8620                doc: /*
8621 Alist to decide a coding system to use for a process I/O operation.
8622 The format is ((PATTERN . VAL) ...),
8623 where PATTERN is a regular expression matching a program name,
8624 VAL is a coding system, a cons of coding systems, or a function symbol.
8625 If VAL is a coding system, it is used for both decoding what received
8626 from the program and encoding what sent to the program.
8627 If VAL is a cons of coding systems, the car part is used for decoding,
8628 and the cdr part is used for encoding.
8629 If VAL is a function symbol, the function must return a coding system
8630 or a cons of coding systems which are used as above.
8631
8632 See also the function `find-operation-coding-system'.  */);
8633   Vprocess_coding_system_alist = Qnil;
8634
8635   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
8636                doc: /*
8637 Alist to decide a coding system to use for a network I/O operation.
8638 The format is ((PATTERN . VAL) ...),
8639 where PATTERN is a regular expression matching a network service name
8640 or is a port number to connect to,
8641 VAL is a coding system, a cons of coding systems, or a function symbol.
8642 If VAL is a coding system, it is used for both decoding what received
8643 from the network stream and encoding what sent to the network stream.
8644 If VAL is a cons of coding systems, the car part is used for decoding,
8645 and the cdr part is used for encoding.
8646 If VAL is a function symbol, the function must return a coding system
8647 or a cons of coding systems which are used as above.
8648
8649 See also the function `find-operation-coding-system'.  */);
8650   Vnetwork_coding_system_alist = Qnil;
8651
8652   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
8653                doc: /* Coding system to use with system messages.
8654 Also used for decoding keyboard input on X Window system.  */);
8655   Vlocale_coding_system = Qnil;
8656
8657   /* The eol mnemonics are reset in startup.el system-dependently.  */
8658   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
8659                doc: /*
8660 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
8661   eol_mnemonic_unix = build_string (":");
8662
8663   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
8664                doc: /*
8665 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
8666   eol_mnemonic_dos = build_string ("\\");
8667
8668   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
8669                doc: /*
8670 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
8671   eol_mnemonic_mac = build_string ("/");
8672
8673   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
8674                doc: /*
8675 *String displayed in mode line when end-of-line format is not yet determined.  */);
8676   eol_mnemonic_undecided = build_string (":");
8677
8678   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
8679                doc: /*
8680 *Non-nil enables character translation while encoding and decoding.  */);
8681   Venable_character_translation = Qt;
8682
8683   DEFVAR_LISP ("standard-translation-table-for-decode",
8684                &Vstandard_translation_table_for_decode,
8685                doc: /* Table for translating characters while decoding.  */);
8686   Vstandard_translation_table_for_decode = Qnil;
8687
8688   DEFVAR_LISP ("standard-translation-table-for-encode",
8689                &Vstandard_translation_table_for_encode,
8690                doc: /* Table for translating characters while encoding.  */);
8691   Vstandard_translation_table_for_encode = Qnil;
8692
8693   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
8694                doc: /* Alist of charsets vs revision numbers.
8695 While encoding, if a charset (car part of an element) is found,
8696 designate it with the escape sequence identifying revision (cdr part
8697 of the element).  */);
8698   Vcharset_revision_table = Qnil;
8699
8700   DEFVAR_LISP ("default-process-coding-system",
8701                &Vdefault_process_coding_system,
8702                doc: /* Cons of coding systems used for process I/O by default.
8703 The car part is used for decoding a process output,
8704 the cdr part is used for encoding a text to be sent to a process.  */);
8705   Vdefault_process_coding_system = Qnil;
8706
8707   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
8708                doc: /*
8709 Table of extra Latin codes in the range 128..159 (inclusive).
8710 This is a vector of length 256.
8711 If Nth element is non-nil, the existence of code N in a file
8712 \(or output of subprocess) doesn't prevent it to be detected as
8713 a coding system of ISO 2022 variant which has a flag
8714 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8715 or reading output of a subprocess.
8716 Only 128th through 159th elements has a meaning.  */);
8717   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
8718
8719   DEFVAR_LISP ("select-safe-coding-system-function",
8720                &Vselect_safe_coding_system_function,
8721                doc: /*
8722 Function to call to select safe coding system for encoding a text.
8723
8724 If set, this function is called to force a user to select a proper
8725 coding system which can encode the text in the case that a default
8726 coding system used in each operation can't encode the text.
8727
8728 The default value is `select-safe-coding-system' (which see).  */);
8729   Vselect_safe_coding_system_function = Qnil;
8730
8731   DEFVAR_BOOL ("inhibit-iso-escape-detection",
8732                &inhibit_iso_escape_detection,
8733                doc: /*
8734 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
8735
8736 By default, on reading a file, Emacs tries to detect how the text is
8737 encoded.  This code detection is sensitive to escape sequences.  If
8738 the sequence is valid as ISO2022, the code is determined as one of
8739 the ISO2022 encodings, and the file is decoded by the corresponding
8740 coding system (e.g. `iso-2022-7bit').
8741
8742 However, there may be a case that you want to read escape sequences in
8743 a file as is.  In such a case, you can set this variable to non-nil.
8744 Then, as the code detection ignores any escape sequences, no file is
8745 detected as encoded in some ISO2022 encoding.  The result is that all
8746 escape sequences become visible in a buffer.
8747
8748 The default value is nil, and it is strongly recommended not to change
8749 it.  That is because many Emacs Lisp source files that contain
8750 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8751 in Emacs's distribution, and they won't be decoded correctly on
8752 reading if you suppress escape sequence detection.
8753
8754 The other way to read escape sequences in a file without decoding is
8755 to explicitly specify some coding system that doesn't use ISO2022's
8756 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
8757   inhibit_iso_escape_detection = 0;
8758
8759   {
8760     Lisp_Object args[coding_arg_max];
8761     Lisp_Object plist[14];
8762     int i;
8763
8764     for (i = 0; i < coding_arg_max; i++)
8765       args[i] = Qnil;
8766
8767     plist[0] = intern (":name");
8768     plist[1] = args[coding_arg_name] = Qno_conversion;
8769     plist[2] = intern (":mnemonic");
8770     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8771     plist[4] = intern (":coding-type");
8772     plist[5] = args[coding_arg_coding_type] = Qraw_text;
8773     plist[6] = intern (":ascii-compatible-p");
8774     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8775     plist[8] = intern (":default-char");
8776     plist[9] = args[coding_arg_default_char] = make_number (0);
8777     plist[10] = intern (":docstring");
8778     plist[11] = build_string ("Do no conversion.\n\
8779 \n\
8780 When you visit a file with this coding, the file is read into a\n\
8781 unibyte buffer as is, thus each byte of a file is treated as a\n\
8782 character.");
8783     plist[12] = intern (":eol-type");
8784     plist[13] = args[coding_arg_eol_type] = Qunix;
8785     args[coding_arg_plist] = Flist (14, plist);
8786     Fdefine_coding_system_internal (coding_arg_max, args);
8787   }
8788
8789   setup_coding_system (Qno_conversion, &keyboard_coding);
8790   setup_coding_system (Qno_conversion, &terminal_coding);
8791   setup_coding_system (Qno_conversion, &safe_terminal_coding);
8792 }
8793
8794 char *
8795 emacs_strerror (error_number)
8796      int error_number;
8797 {
8798   char *str;
8799
8800   synchronize_system_messages_locale ();
8801   str = strerror (error_number);
8802
8803   if (! NILP (Vlocale_coding_system))
8804     {
8805       Lisp_Object dec = code_convert_string_norecord (build_string (str),
8806                                                       Vlocale_coding_system,
8807                                                       0);
8808       str = (char *) XSTRING (dec)->data;
8809     }
8810
8811   return str;
8812 }
8813
8814 #endif /* emacs */