src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3      Licensed to the Free Software Foundation.
   4    Copyright (C) 2001, 2002 Free Software Foundation, Inc.
   5    Copyright (C) 2003
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H13PRO009
   8
   9 This file is part of GNU Emacs.
  10
  11 GNU Emacs is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU General Public License as published by
  13 the Free Software Foundation; either version 2, or (at your option)
  14 any later version.
  15
  16 GNU Emacs is distributed in the hope that it will be useful,
  17 but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 GNU General Public License for more details.
  20
  21 You should have received a copy of the GNU General Public License
  22 along with GNU Emacs; see the file COPYING.  If not, write to
  23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  24 Boston, MA 02111-1307, USA.  */
  25
  26 /*** TABLE OF CONTENTS ***
  27
  28   0. General comments
  29   1. Preamble
  30   2. Emacs' internal format (emacs-utf-8) handlers
  31   3. UTF-8 handlers
  32   4. UTF-16 handlers
  33   5. Charset-base coding systems handlers
  34   6. emacs-mule (old Emacs' internal format) handlers
  35   7. ISO2022 handlers
  36   8. Shift-JIS and BIG5 handlers
  37   9. CCL handlers
  38   10. C library functions
  39   11. Emacs Lisp library functions
  40   12. Postamble
  41
  42 */
  43
  44 /*** 0. General comments ***
  45
  46
  47 CODING SYSTEM
  48
  49   A coding system is an object for an encoding mechanism that contains
  50   information about how to convert byte sequences to character
  51   sequences and vice versa.  When we say "decode", it means converting
  52   a byte sequence of a specific coding system into a character
  53   sequence that is represented by Emacs' internal coding system
  54   `emacs-utf-8', and when we say "encode", it means converting a
  55   character sequence of emacs-utf-8 to a byte sequence of a specific
  56   coding system.
  57
  58   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  59   C level, a coding system is represented by a vector of attributes
  60   stored in the hash table Vcharset_hash_table.  The conversion from
  61   coding system symbol to attributes vector is done by looking up
  62   Vcharset_hash_table by the symbol.
  63
  64   Coding systems are classified into the following types depending on
  65   the encoding mechanism.  Here's a brief description of the types.
  66
  67   o UTF-8
  68
  69   o UTF-16
  70
  71   o Charset-base coding system
  72
  73   A coding system defined by one or more (coded) character sets.
  74   Decoding and encoding are done by a code converter defined for each
  75   character set.
  76
  77   o Old Emacs internal format (emacs-mule)
  78
  79   The coding system adopted by old versions of Emacs (20 and 21).
  80
  81   o ISO2022-base coding system
  82
  83   The most famous coding system for multiple character sets.  X's
  84   Compound Text, various EUCs (Extended Unix Code), and coding systems
  85   used in the Internet communication such as ISO-2022-JP are all
  86   variants of ISO2022.
  87
  88   o SJIS (or Shift-JIS or MS-Kanji-Code)
  89
  90   A coding system to encode character sets: ASCII, JISX0201, and
  91   JISX0208.  Widely used for PC's in Japan.  Details are described in
  92   section 8.
  93
  94   o BIG5
  95
  96   A coding system to encode character sets: ASCII and Big5.  Widely
  97   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  98   described in section 8.  In this file, when we write "big5" (all
  99   lowercase), we mean the coding system, and when we write "Big5"
 100   (capitalized), we mean the character set.
 101
 102   o CCL
 103
 104   If a user wants to decode/encode text encoded in a coding system
 105   not listed above, he can supply a decoder and an encoder for it in
 106   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 107   program while decoding/encoding.
 108
 109   o Raw-text
 110
 111   A coding system for text containing raw eight-bit data.  Emacs
 112   treats each byte of source text as a character (except for
 113   end-of-line conversion).
 114
 115   o No-conversion
 116
 117   Like raw text, but don't do end-of-line conversion.
 118
 119
 120 END-OF-LINE FORMAT
 121
 122   How text end-of-line is encoded depends on operating system.  For
 123   instance, Unix's format is just one byte of LF (line-feed) code,
 124   whereas DOS's format is two-byte sequence of `carriage-return' and
 125   `line-feed' codes.  MacOS's format is usually one byte of
 126   `carriage-return'.
 127
 128   Since text character encoding and end-of-line encoding are
 129   independent, any coding system described above can take any format
 130   of end-of-line (except for no-conversion).
 131
 132 STRUCT CODING_SYSTEM
 133
 134   Before using a coding system for code conversion (i.e. decoding and
 135   encoding), we setup a structure of type `struct coding_system'.
 136   This structure keeps various information about a specific code
 137   conversion (e.g. the location of source and destination data).
 138
 139 */
 140
 141 /* COMMON MACROS */
 142
 143
 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 145
 146   These functions check if a byte sequence specified as a source in
 147   CODING conforms to the format of XXX, and update the members of
 148   DETECT_INFO.
 149
 150   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 151
 152   Below is the template of these functions.  */
 153
 154 #if 0
 155 static int
 156 detect_coding_XXX (coding, detect_info)
 157      struct coding_system *coding;
 158      struct coding_detection_info *detect_info;
 159 {
 160   unsigned char *src = coding->source;
 161   unsigned char *src_end = coding->source + coding->src_bytes;
 162   int multibytep = coding->src_multibyte;
 163   int consumed_chars = 0;
 164   int found = 0;
 165   ...;
 166
 167   while (1)
 168     {
 169       /* Get one byte from the source.  If the souce is exausted, jump
 170          to no_more_source:.  */
 171       ONE_MORE_BYTE (c);
 172
 173       if (! __C_conforms_to_XXX___ (c))
 174         break;
 175       if (! __C_strongly_suggests_XXX__ (c))
 176         found = CATEGORY_MASK_XXX;
 177     }
 178   /* The byte sequence is invalid for XXX.  */
 179   detect_info->rejected |= CATEGORY_MASK_XXX;
 180   return 0;
 181
 182  no_more_source:
 183   /* The source exausted successfully.  */
 184   detect_info->found |= found;
 185   return 1;
 186 }
 187 #endif
 188
 189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 190
 191   These functions decode a byte sequence specified as a source by
 192   CODING.  The resulting multibyte text goes to a place pointed to by
 193   CODING->charbuf, the length of which should not exceed
 194   CODING->charbuf_size;
 195
 196   These functions set the information of original and decoded texts in
 197   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 198   They also set CODING->result to one of CODING_RESULT_XXX indicating
 199   how the decoding is finished.
 200
 201   Below is the template of these functions.  */
 202
 203 #if 0
 204 static void
 205 decode_coding_XXXX (coding)
 206      struct coding_system *coding;
 207 {
 208   unsigned char *src = coding->source + coding->consumed;
 209   unsigned char *src_end = coding->source + coding->src_bytes;
 210   /* SRC_BASE remembers the start position in source in each loop.
 211      The loop will be exited when there's not enough source code, or
 212      when there's no room in CHARBUF for a decoded character.  */
 213   unsigned char *src_base;
 214   /* A buffer to produce decoded characters.  */
 215   int *charbuf = coding->charbuf + coding->charbuf_used;
 216   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 217   int multibytep = coding->src_multibyte;
 218
 219   while (1)
 220     {
 221       src_base = src;
 222       if (charbuf < charbuf_end)
 223         /* No more room to produce a decoded character.  */
 224         break;
 225       ONE_MORE_BYTE (c);
 226       /* Decode it. */
 227     }
 228
 229  no_more_source:
 230   if (src_base < src_end
 231       && coding->mode & CODING_MODE_LAST_BLOCK)
 232     /* If the source ends by partial bytes to construct a character,
 233        treat them as eight-bit raw data.  */
 234     while (src_base < src_end && charbuf < charbuf_end)
 235       *charbuf++ = *src_base++;
 236   /* Remember how many bytes and characters we consumed.  If the
 237      source is multibyte, the bytes and chars are not identical.  */
 238   coding->consumed = coding->consumed_char = src_base - coding->source;
 239   /* Remember how many characters we produced.  */
 240   coding->charbuf_used = charbuf - coding->charbuf;
 241 }
 242 #endif
 243
 244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 245
 246   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 247   internal multibyte format by CODING.  The resulting byte sequence
 248   goes to a place pointed to by DESTINATION, the length of which
 249   should not exceed DST_BYTES.
 250
 251   These functions set the information of original and encoded texts in
 252   the members produced, produced_char, consumed, and consumed_char of
 253   the structure *CODING.  They also set the member result to one of
 254   CODING_RESULT_XXX indicating how the encoding finished.
 255
 256   DST_BYTES zero means that source area and destination area are
 257   overlapped, which means that we can produce a encoded text until it
 258   reaches at the head of not-yet-encoded source text.
 259
 260   Below is a template of these functions.  */
 261 #if 0
 262 static void
 263 encode_coding_XXX (coding)
 264      struct coding_system *coding;
 265 {
 266   int multibytep = coding->dst_multibyte;
 267   int *charbuf = coding->charbuf;
 268   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 269   unsigned char *dst = coding->destination + coding->produced;
 270   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 271   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 272   int produced_chars = 0;
 273
 274   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 275     {
 276       int c = *charbuf;
 277       /* Encode C into DST, and increment DST.  */
 278     }
 279  label_no_more_destination:
 280   /* How many chars and bytes we produced.  */
 281   coding->produced_char += produced_chars;
 282   coding->produced = dst - coding->destination;
 283 }
 284 #endif
 285
 286 \f
 287 /*** 1. Preamble ***/
 288
 289 #include <config.h>
 290 #include <stdio.h>
 291
 292 #include "lisp.h"
 293 #include "buffer.h"
 294 #include "character.h"
 295 #include "charset.h"
 296 #include "ccl.h"
 297 #include "composite.h"
 298 #include "coding.h"
 299 #include "window.h"
 300
 301 Lisp_Object Vcoding_system_hash_table;
 302
 303 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 304 Lisp_Object Qunix, Qdos;
 305 extern Lisp_Object Qmac;        /* frame.c */
 306 Lisp_Object Qbuffer_file_coding_system;
 307 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 308 Lisp_Object Qdefault_char;
 309 Lisp_Object Qno_conversion, Qundecided;
 310 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 311 Lisp_Object Qbig, Qlittle;
 312 Lisp_Object Qcoding_system_history;
 313 Lisp_Object Qvalid_codes;
 314 Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
 315 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 316 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 317
 318 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 319 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 320 Lisp_Object Qstart_process, Qopen_network_stream;
 321 Lisp_Object Qtarget_idx;
 322
 323 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 324 Lisp_Object Qinterrupted, Qinsufficient_memory;
 325
 326 int coding_system_require_warning;
 327
 328 Lisp_Object Vselect_safe_coding_system_function;
 329
 330 /* Mnemonic string for each format of end-of-line.  */
 331 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 332 /* Mnemonic string to indicate format of end-of-line is not yet
 333    decided.  */
 334 Lisp_Object eol_mnemonic_undecided;
 335
 336 #ifdef emacs
 337
 338 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 339
 340 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 341
 342 /* Coding system emacs-mule and raw-text are for converting only
 343    end-of-line format.  */
 344 Lisp_Object Qemacs_mule, Qraw_text;
 345 Lisp_Object Qutf_8_emacs;
 346
 347 /* Coding-systems are handed between Emacs Lisp programs and C internal
 348    routines by the following three variables.  */
 349 /* Coding-system for reading files and receiving data from process.  */
 350 Lisp_Object Vcoding_system_for_read;
 351 /* Coding-system for writing files and sending data to process.  */
 352 Lisp_Object Vcoding_system_for_write;
 353 /* Coding-system actually used in the latest I/O.  */
 354 Lisp_Object Vlast_coding_system_used;
 355 /* Set to non-nil when an error is detected while code conversion.  */
 356 Lisp_Object Vlast_code_conversion_error;
 357 /* A vector of length 256 which contains information about special
 358    Latin codes (especially for dealing with Microsoft codes).  */
 359 Lisp_Object Vlatin_extra_code_table;
 360
 361 /* Flag to inhibit code conversion of end-of-line format.  */
 362 int inhibit_eol_conversion;
 363
 364 /* Flag to inhibit ISO2022 escape sequence detection.  */
 365 int inhibit_iso_escape_detection;
 366
 367 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 368 int inherit_process_coding_system;
 369
 370 /* Coding system to be used to encode text for terminal display.  */
 371 struct coding_system terminal_coding;
 372
 373 /* Coding system to be used to encode text for terminal display when
 374    terminal coding system is nil.  */
 375 struct coding_system safe_terminal_coding;
 376
 377 /* Coding system of what is sent from terminal keyboard.  */
 378 struct coding_system keyboard_coding;
 379
 380 Lisp_Object Vfile_coding_system_alist;
 381 Lisp_Object Vprocess_coding_system_alist;
 382 Lisp_Object Vnetwork_coding_system_alist;
 383
 384 Lisp_Object Vlocale_coding_system;
 385
 386 #endif /* emacs */
 387
 388 /* Flag to tell if we look up translation table on character code
 389    conversion.  */
 390 Lisp_Object Venable_character_translation;
 391 /* Standard translation table to look up on decoding (reading).  */
 392 Lisp_Object Vstandard_translation_table_for_decode;
 393 /* Standard translation table to look up on encoding (writing).  */
 394 Lisp_Object Vstandard_translation_table_for_encode;
 395
 396 Lisp_Object Qtranslation_table;
 397 Lisp_Object Qtranslation_table_id;
 398 Lisp_Object Qtranslation_table_for_decode;
 399 Lisp_Object Qtranslation_table_for_encode;
 400
 401 /* Alist of charsets vs revision number.  */
 402 static Lisp_Object Vcharset_revision_table;
 403
 404 /* Default coding systems used for process I/O.  */
 405 Lisp_Object Vdefault_process_coding_system;
 406
 407 /* Char table for translating Quail and self-inserting input.  */
 408 Lisp_Object Vtranslation_table_for_input;
 409
 410 /* Two special coding systems.  */
 411 Lisp_Object Vsjis_coding_system;
 412 Lisp_Object Vbig5_coding_system;
 413
 414 static void record_conversion_result (struct coding_system *coding,
 415                                       enum coding_result_code result);
 416 static int detect_coding_utf_8 P_ ((struct coding_system *,
 417                                     struct coding_detection_info *info));
 418 static void decode_coding_utf_8 P_ ((struct coding_system *));
 419 static int encode_coding_utf_8 P_ ((struct coding_system *));
 420
 421 static int detect_coding_utf_16 P_ ((struct coding_system *,
 422                                      struct coding_detection_info *info));
 423 static void decode_coding_utf_16 P_ ((struct coding_system *));
 424 static int encode_coding_utf_16 P_ ((struct coding_system *));
 425
 426 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 427                                        struct coding_detection_info *info));
 428 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 429 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 430
 431 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 432                                          struct coding_detection_info *info));
 433 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 434 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 435
 436 static int detect_coding_sjis P_ ((struct coding_system *,
 437                                    struct coding_detection_info *info));
 438 static void decode_coding_sjis P_ ((struct coding_system *));
 439 static int encode_coding_sjis P_ ((struct coding_system *));
 440
 441 static int detect_coding_big5 P_ ((struct coding_system *,
 442                                    struct coding_detection_info *info));
 443 static void decode_coding_big5 P_ ((struct coding_system *));
 444 static int encode_coding_big5 P_ ((struct coding_system *));
 445
 446 static int detect_coding_ccl P_ ((struct coding_system *,
 447                                   struct coding_detection_info *info));
 448 static void decode_coding_ccl P_ ((struct coding_system *));
 449 static int encode_coding_ccl P_ ((struct coding_system *));
 450
 451 static void decode_coding_raw_text P_ ((struct coding_system *));
 452 static int encode_coding_raw_text P_ ((struct coding_system *));
 453
 454
 455 /* ISO2022 section */
 456
 457 #define CODING_ISO_INITIAL(coding, reg)                 \
 458   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 459                      coding_attr_iso_initial),          \
 460                reg)))
 461
 462
 463 #define CODING_ISO_REQUEST(coding, charset_id)  \
 464   ((charset_id <= (coding)->max_charset_id      \
 465     ? (coding)->safe_charsets[charset_id]       \
 466     : -1))
 467
 468
 469 #define CODING_ISO_FLAGS(coding)        \
 470   ((coding)->spec.iso_2022.flags)
 471 #define CODING_ISO_DESIGNATION(coding, reg)     \
 472   ((coding)->spec.iso_2022.current_designation[reg])
 473 #define CODING_ISO_INVOCATION(coding, plane)    \
 474   ((coding)->spec.iso_2022.current_invocation[plane])
 475 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 476   ((coding)->spec.iso_2022.single_shifting)
 477 #define CODING_ISO_BOL(coding)  \
 478   ((coding)->spec.iso_2022.bol)
 479 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 480   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 481
 482 /* Control characters of ISO2022.  */
 483                         /* code */      /* function */
 484 #define ISO_CODE_LF     0x0A            /* line-feed */
 485 #define ISO_CODE_CR     0x0D            /* carriage-return */
 486 #define ISO_CODE_SO     0x0E            /* shift-out */
 487 #define ISO_CODE_SI     0x0F            /* shift-in */
 488 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 489 #define ISO_CODE_ESC    0x1B            /* escape */
 490 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 491 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 492 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 493
 494 /* All code (1-byte) of ISO2022 is classified into one of the
 495    followings.  */
 496 enum iso_code_class_type
 497   {
 498     ISO_control_0,              /* Control codes in the range
 499                                    0x00..0x1F and 0x7F, except for the
 500                                    following 5 codes.  */
 501     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 502     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 503     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 504     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 505     ISO_control_1,              /* Control codes in the range
 506                                    0x80..0x9F, except for the
 507                                    following 3 codes.  */
 508     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 509     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 510     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 511     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 512     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 513     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 514     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 515   };
 516
 517 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 518     `iso-flags' attribute of an iso2022 coding system.  */
 519
 520 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 521    instead of the correct short-form sequence (e.g. ESC $ A).  */
 522 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 523
 524 /* If set, reset graphic planes and registers at end-of-line to the
 525    initial state.  */
 526 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 527
 528 /* If set, reset graphic planes and registers before any control
 529    characters to the initial state.  */
 530 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 531
 532 /* If set, encode by 7-bit environment.  */
 533 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 534
 535 /* If set, use locking-shift function.  */
 536 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 537
 538 /* If set, use single-shift function.  Overwrite
 539    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 540 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 541
 542 /* If set, use designation escape sequence.  */
 543 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 544
 545 /* If set, produce revision number sequence.  */
 546 #define CODING_ISO_FLAG_REVISION        0x0080
 547
 548 /* If set, produce ISO6429's direction specifying sequence.  */
 549 #define CODING_ISO_FLAG_DIRECTION       0x0100
 550
 551 /* If set, assume designation states are reset at beginning of line on
 552    output.  */
 553 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 554
 555 /* If set, designation sequence should be placed at beginning of line
 556    on output.  */
 557 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 558
 559 /* If set, do not encode unsafe charactes on output.  */
 560 #define CODING_ISO_FLAG_SAFE            0x0800
 561
 562 /* If set, extra latin codes (128..159) are accepted as a valid code
 563    on input.  */
 564 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 565
 566 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 567
 568 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 569
 570 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 571
 572 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 573
 574 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 575
 576 /* A character to be produced on output if encoding of the original
 577    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 578 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 579
 580
 581 /* UTF-16 section */
 582 #define CODING_UTF_16_BOM(coding)       \
 583   ((coding)->spec.utf_16.bom)
 584
 585 #define CODING_UTF_16_ENDIAN(coding)    \
 586   ((coding)->spec.utf_16.endian)
 587
 588 #define CODING_UTF_16_SURROGATE(coding) \
 589   ((coding)->spec.utf_16.surrogate)
 590
 591
 592 /* CCL section */
 593 #define CODING_CCL_DECODER(coding)      \
 594   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 595 #define CODING_CCL_ENCODER(coding)      \
 596   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 597 #define CODING_CCL_VALIDS(coding)                                          \
 598   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 599
 600 /* Index for each coding category in `coding_categories' */
 601
 602 enum coding_category
 603   {
 604     coding_category_iso_7,
 605     coding_category_iso_7_tight,
 606     coding_category_iso_8_1,
 607     coding_category_iso_8_2,
 608     coding_category_iso_7_else,
 609     coding_category_iso_8_else,
 610     coding_category_utf_8,
 611     coding_category_utf_16_auto,
 612     coding_category_utf_16_be,
 613     coding_category_utf_16_le,
 614     coding_category_utf_16_be_nosig,
 615     coding_category_utf_16_le_nosig,
 616     coding_category_charset,
 617     coding_category_sjis,
 618     coding_category_big5,
 619     coding_category_ccl,
 620     coding_category_emacs_mule,
 621     /* All above are targets of code detection.  */
 622     coding_category_raw_text,
 623     coding_category_undecided,
 624     coding_category_max
 625   };
 626
 627 /* Definitions of flag bits used in detect_coding_XXXX.  */
 628 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 629 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 630 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 631 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 632 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 633 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 634 #define CATEGORY_MASK_UTF_8             (1 << coding_category_utf_8)
 635 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 636 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 637 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 638 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 639 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 640 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 641 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 642 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 643 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 644 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 645 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 646
 647 /* This value is returned if detect_coding_mask () find nothing other
 648    than ASCII characters.  */
 649 #define CATEGORY_MASK_ANY               \
 650   (CATEGORY_MASK_ISO_7                  \
 651    | CATEGORY_MASK_ISO_7_TIGHT          \
 652    | CATEGORY_MASK_ISO_8_1              \
 653    | CATEGORY_MASK_ISO_8_2              \
 654    | CATEGORY_MASK_ISO_7_ELSE           \
 655    | CATEGORY_MASK_ISO_8_ELSE           \
 656    | CATEGORY_MASK_UTF_8                \
 657    | CATEGORY_MASK_UTF_16_BE            \
 658    | CATEGORY_MASK_UTF_16_LE            \
 659    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 660    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 661    | CATEGORY_MASK_CHARSET              \
 662    | CATEGORY_MASK_SJIS                 \
 663    | CATEGORY_MASK_BIG5                 \
 664    | CATEGORY_MASK_CCL                  \
 665    | CATEGORY_MASK_EMACS_MULE)
 666
 667
 668 #define CATEGORY_MASK_ISO_7BIT \
 669   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 670
 671 #define CATEGORY_MASK_ISO_8BIT \
 672   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 673
 674 #define CATEGORY_MASK_ISO_ELSE \
 675   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 676
 677 #define CATEGORY_MASK_ISO_ESCAPE        \
 678   (CATEGORY_MASK_ISO_7                  \
 679    | CATEGORY_MASK_ISO_7_TIGHT          \
 680    | CATEGORY_MASK_ISO_7_ELSE           \
 681    | CATEGORY_MASK_ISO_8_ELSE)
 682
 683 #define CATEGORY_MASK_ISO       \
 684   (  CATEGORY_MASK_ISO_7BIT     \
 685      | CATEGORY_MASK_ISO_8BIT   \
 686      | CATEGORY_MASK_ISO_ELSE)
 687
 688 #define CATEGORY_MASK_UTF_16            \
 689   (CATEGORY_MASK_UTF_16_BE              \
 690    | CATEGORY_MASK_UTF_16_LE            \
 691    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 692    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 693
 694
 695 /* List of symbols `coding-category-xxx' ordered by priority.  This
 696    variable is exposed to Emacs Lisp.  */
 697 static Lisp_Object Vcoding_category_list;
 698
 699 /* Table of coding categories (Lisp symbols).  This variable is for
 700    internal use oly.  */
 701 static Lisp_Object Vcoding_category_table;
 702
 703 /* Table of coding-categories ordered by priority.  */
 704 static enum coding_category coding_priorities[coding_category_max];
 705
 706 /* Nth element is a coding context for the coding system bound to the
 707    Nth coding category.  */
 708 static struct coding_system coding_categories[coding_category_max];
 709
 710 /*** Commonly used macros and functions ***/
 711
 712 #ifndef min
 713 #define min(a, b) ((a) < (b) ? (a) : (b))
 714 #endif
 715 #ifndef max
 716 #define max(a, b) ((a) > (b) ? (a) : (b))
 717 #endif
 718
 719 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 720   do {                                                  \
 721     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 722     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 723   } while (0)
 724
 725
 726 /* Safely get one byte from the source text pointed by SRC which ends
 727    at SRC_END, and set C to that byte.  If there are not enough bytes
 728    in the source, it jumps to `no_more_source'.  If multibytep is
 729    nonzero, and a multibyte character is found at SRC, set C to the
 730    negative value of the character code.  The caller should declare
 731    and set these variables appropriately in advance:
 732         src, src_end, multibytep */
 733
 734 #define ONE_MORE_BYTE(c)                                \
 735   do {                                                  \
 736     if (src == src_end)                                 \
 737       {                                                 \
 738         if (src_base < src)                             \
 739           record_conversion_result                      \
 740             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 741         goto no_more_source;                            \
 742       }                                                 \
 743     c = *src++;                                         \
 744     if (multibytep && (c & 0x80))                       \
 745       {                                                 \
 746         if ((c & 0xFE) == 0xC0)                         \
 747           c = ((c & 1) << 6) | *src++;                  \
 748         else                                            \
 749           {                                             \
 750             c = - string_char (--src, &src, NULL);      \
 751             record_conversion_result                    \
 752               (coding, CODING_RESULT_INVALID_SRC);      \
 753           }                                             \
 754       }                                                 \
 755     consumed_chars++;                                   \
 756   } while (0)
 757
 758
 759 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 760   do {                                                  \
 761     c = *src++;                                         \
 762     if (multibytep && (c & 0x80))                       \
 763       {                                                 \
 764         if ((c & 0xFE) == 0xC0)                         \
 765           c = ((c & 1) << 6) | *src++;                  \
 766         else                                            \
 767           {                                             \
 768             c = - string_char (--src, &src, NULL);      \
 769             record_conversion_result                    \
 770               (coding, CODING_RESULT_INVALID_SRC);      \
 771           }                                             \
 772       }                                                 \
 773     consumed_chars++;                                   \
 774   } while (0)
 775
 776
 777 /* Store a byte C in the place pointed by DST and increment DST to the
 778    next free point, and increment PRODUCED_CHARS.  The caller should
 779    assure that C is 0..127, and declare and set the variable `dst'
 780    appropriately in advance.
 781 */
 782
 783
 784 #define EMIT_ONE_ASCII_BYTE(c)  \
 785   do {                          \
 786     produced_chars++;           \
 787     *dst++ = (c);               \
 788   } while (0)
 789
 790
 791 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 792
 793 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 794   do {                                  \
 795     produced_chars += 2;                \
 796     *dst++ = (c1), *dst++ = (c2);       \
 797   } while (0)
 798
 799
 800 /* Store a byte C in the place pointed by DST and increment DST to the
 801    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 802    nonzero, store in an appropriate multibyte from.  The caller should
 803    declare and set the variables `dst' and `multibytep' appropriately
 804    in advance.  */
 805
 806 #define EMIT_ONE_BYTE(c)                \
 807   do {                                  \
 808     produced_chars++;                   \
 809     if (multibytep)                     \
 810       {                                 \
 811         int ch = (c);                   \
 812         if (ch >= 0x80)                 \
 813           ch = BYTE8_TO_CHAR (ch);      \
 814         CHAR_STRING_ADVANCE (ch, dst);  \
 815       }                                 \
 816     else                                \
 817       *dst++ = (c);                     \
 818   } while (0)
 819
 820
 821 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 822
 823 #define EMIT_TWO_BYTES(c1, c2)          \
 824   do {                                  \
 825     produced_chars += 2;                \
 826     if (multibytep)                     \
 827       {                                 \
 828         int ch;                         \
 829                                         \
 830         ch = (c1);                      \
 831         if (ch >= 0x80)                 \
 832           ch = BYTE8_TO_CHAR (ch);      \
 833         CHAR_STRING_ADVANCE (ch, dst);  \
 834         ch = (c2);                      \
 835         if (ch >= 0x80)                 \
 836           ch = BYTE8_TO_CHAR (ch);      \
 837         CHAR_STRING_ADVANCE (ch, dst);  \
 838       }                                 \
 839     else                                \
 840       {                                 \
 841         *dst++ = (c1);                  \
 842         *dst++ = (c2);                  \
 843       }                                 \
 844   } while (0)
 845
 846
 847 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 848   do {                                  \
 849     EMIT_ONE_BYTE (c1);                 \
 850     EMIT_TWO_BYTES (c2, c3);            \
 851   } while (0)
 852
 853
 854 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 855   do {                                          \
 856     EMIT_TWO_BYTES (c1, c2);                    \
 857     EMIT_TWO_BYTES (c3, c4);                    \
 858   } while (0)
 859
 860
 861 static void
 862 record_conversion_result (struct coding_system *coding,
 863                           enum coding_result_code result)
 864 {
 865   coding->result = result;
 866   switch (result)
 867     {
 868     case CODING_RESULT_INSUFFICIENT_SRC:
 869       Vlast_code_conversion_error = Qinsufficient_source;
 870       break;
 871     case CODING_RESULT_INCONSISTENT_EOL:
 872       Vlast_code_conversion_error = Qinconsistent_eol;
 873       break;
 874     case CODING_RESULT_INVALID_SRC:
 875       Vlast_code_conversion_error = Qinvalid_source;
 876       break;
 877     case CODING_RESULT_INTERRUPT:
 878       Vlast_code_conversion_error = Qinterrupted;
 879       break;
 880     case CODING_RESULT_INSUFFICIENT_MEM:
 881       Vlast_code_conversion_error = Qinsufficient_memory;
 882       break;
 883     }
 884 }
 885
 886 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 887   do {                                                                       \
 888     charset_map_loaded = 0;                                                  \
 889     c = DECODE_CHAR (charset, code);                                         \
 890     if (charset_map_loaded)                                                  \
 891       {                                                                      \
 892         const unsigned char *orig = coding->source;                          \
 893         EMACS_INT offset;                                                    \
 894                                                                              \
 895         coding_set_source (coding);                                          \
 896         offset = coding->source - orig;                                      \
 897         src += offset;                                                       \
 898         src_base += offset;                                                  \
 899         src_end += offset;                                                   \
 900       }                                                                      \
 901   } while (0)
 902
 903
 904 #define ASSURE_DESTINATION(bytes)                               \
 905   do {                                                          \
 906     if (dst + (bytes) >= dst_end)                               \
 907       {                                                         \
 908         int more_bytes = charbuf_end - charbuf + (bytes);       \
 909                                                                 \
 910         dst = alloc_destination (coding, more_bytes, dst);      \
 911         dst_end = coding->destination + coding->dst_bytes;      \
 912       }                                                         \
 913   } while (0)
 914
 915
 916
 917 static void
 918 coding_set_source (coding)
 919      struct coding_system *coding;
 920 {
 921   if (BUFFERP (coding->src_object))
 922     {
 923       struct buffer *buf = XBUFFER (coding->src_object);
 924
 925       if (coding->src_pos < 0)
 926         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
 927       else
 928         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
 929     }
 930   else if (STRINGP (coding->src_object))
 931     {
 932       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
 933     }
 934   else
 935     /* Otherwise, the source is C string and is never relocated
 936        automatically.  Thus we don't have to update anything.  */
 937     ;
 938 }
 939
 940 static void
 941 coding_set_destination (coding)
 942      struct coding_system *coding;
 943 {
 944   if (BUFFERP (coding->dst_object))
 945     {
 946       if (coding->src_pos < 0)
 947         {
 948           coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
 949           coding->dst_bytes = (GAP_END_ADDR
 950                                - (coding->src_bytes - coding->consumed)
 951                                - coding->destination);
 952         }
 953       else
 954         {
 955           /* We are sure that coding->dst_pos_byte is before the gap
 956              of the buffer. */
 957           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
 958                                  + coding->dst_pos_byte - 1);
 959           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
 960                                - coding->destination);
 961         }
 962     }
 963   else
 964     /* Otherwise, the destination is C string and is never relocated
 965        automatically.  Thus we don't have to update anything.  */
 966     ;
 967 }
 968
 969
 970 static void
 971 coding_alloc_by_realloc (coding, bytes)
 972      struct coding_system *coding;
 973      EMACS_INT bytes;
 974 {
 975   coding->destination = (unsigned char *) xrealloc (coding->destination,
 976                                                     coding->dst_bytes + bytes);
 977   coding->dst_bytes += bytes;
 978 }
 979
 980 static void
 981 coding_alloc_by_making_gap (coding, bytes)
 982      struct coding_system *coding;
 983      EMACS_INT bytes;
 984 {
 985   if (BUFFERP (coding->dst_object)
 986       && EQ (coding->src_object, coding->dst_object))
 987     {
 988       EMACS_INT add = coding->src_bytes - coding->consumed;
 989
 990       GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
 991       make_gap (bytes);
 992       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
 993     }
 994   else
 995     {
 996       Lisp_Object this_buffer;
 997
 998       this_buffer = Fcurrent_buffer ();
 999       set_buffer_internal (XBUFFER (coding->dst_object));
1000       make_gap (bytes);
1001       set_buffer_internal (XBUFFER (this_buffer));
1002     }
1003 }
1004
1005
1006 static unsigned char *
1007 alloc_destination (coding, nbytes, dst)
1008      struct coding_system *coding;
1009      EMACS_INT nbytes;
1010      unsigned char *dst;
1011 {
1012   EMACS_INT offset = dst - coding->destination;
1013
1014   if (BUFFERP (coding->dst_object))
1015     coding_alloc_by_making_gap (coding, nbytes);
1016   else
1017     coding_alloc_by_realloc (coding, nbytes);
1018   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1019   coding_set_destination (coding);
1020   dst = coding->destination + offset;
1021   return dst;
1022 }
1023
1024 /** Macros for annotations.  */
1025
1026 /* Maximum length of annotation data (sum of annotations for
1027    composition and charset).  */
1028 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1029
1030 /* An annotation data is stored in the array coding->charbuf in this
1031    format:
1032      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1033    LENGTH is the number of elements in the annotation.
1034    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1035    NCHARS is the number of characters in the text annotated.
1036
1037    The format of the following elements depend on ANNOTATION_MASK.
1038
1039    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1040    follows:
1041      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1042    METHOD is one of enum composition_method.
1043    Optionnal COMPOSITION-COMPONENTS are characters and composition
1044    rules.
1045
1046    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1047    follows.  */
1048
1049 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1050   do {                                                  \
1051     *(buf)++ = -(len);                                  \
1052     *(buf)++ = (mask);                                  \
1053     *(buf)++ = (nchars);                                \
1054     coding->annotated = 1;                              \
1055   } while (0);
1056
1057 #define ADD_COMPOSITION_DATA(buf, nchars, method)                           \
1058   do {                                                                      \
1059     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1060     *buf++ = method;                                                        \
1061   } while (0)
1062
1063
1064 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1065   do {                                                                  \
1066     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1067     *buf++ = id;                                                        \
1068   } while (0)
1069
1070 \f
1071 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1072
1073
1074
1075 \f
1076 /*** 3. UTF-8 ***/
1077
1078 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1079    Check if a text is encoded in UTF-8.  If it is, return 1, else
1080    return 0.  */
1081
1082 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1083 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1084 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1085 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1086 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1087 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1088
1089 static int
1090 detect_coding_utf_8 (coding, detect_info)
1091      struct coding_system *coding;
1092      struct coding_detection_info *detect_info;
1093 {
1094   const unsigned char *src = coding->source, *src_base;
1095   const unsigned char *src_end = coding->source + coding->src_bytes;
1096   int multibytep = coding->src_multibyte;
1097   int consumed_chars = 0;
1098   int found = 0;
1099
1100   detect_info->checked |= CATEGORY_MASK_UTF_8;
1101   /* A coding system of this category is always ASCII compatible.  */
1102   src += coding->head_ascii;
1103
1104   while (1)
1105     {
1106       int c, c1, c2, c3, c4;
1107
1108       src_base = src;
1109       ONE_MORE_BYTE (c);
1110       if (c < 0 || UTF_8_1_OCTET_P (c))
1111         continue;
1112       ONE_MORE_BYTE (c1);
1113       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1114         break;
1115       if (UTF_8_2_OCTET_LEADING_P (c))
1116         {
1117           found = CATEGORY_MASK_UTF_8;
1118           continue;
1119         }
1120       ONE_MORE_BYTE (c2);
1121       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1122         break;
1123       if (UTF_8_3_OCTET_LEADING_P (c))
1124         {
1125           found = CATEGORY_MASK_UTF_8;
1126           continue;
1127         }
1128       ONE_MORE_BYTE (c3);
1129       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1130         break;
1131       if (UTF_8_4_OCTET_LEADING_P (c))
1132         {
1133           found = CATEGORY_MASK_UTF_8;
1134           continue;
1135         }
1136       ONE_MORE_BYTE (c4);
1137       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1138         break;
1139       if (UTF_8_5_OCTET_LEADING_P (c))
1140         {
1141           found = CATEGORY_MASK_UTF_8;
1142           continue;
1143         }
1144       break;
1145     }
1146   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1147   return 0;
1148
1149  no_more_source:
1150   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1151     {
1152       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1153       return 0;
1154     }
1155   detect_info->found |= found;
1156   return 1;
1157 }
1158
1159
1160 static void
1161 decode_coding_utf_8 (coding)
1162      struct coding_system *coding;
1163 {
1164   const unsigned char *src = coding->source + coding->consumed;
1165   const unsigned char *src_end = coding->source + coding->src_bytes;
1166   const unsigned char *src_base;
1167   int *charbuf = coding->charbuf + coding->charbuf_used;
1168   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1169   int consumed_chars = 0, consumed_chars_base;
1170   int multibytep = coding->src_multibyte;
1171   Lisp_Object attr, charset_list;
1172
1173   CODING_GET_INFO (coding, attr, charset_list);
1174
1175   while (1)
1176     {
1177       int c, c1, c2, c3, c4, c5;
1178
1179       src_base = src;
1180       consumed_chars_base = consumed_chars;
1181
1182       if (charbuf >= charbuf_end)
1183         break;
1184
1185       ONE_MORE_BYTE (c1);
1186       if (c1 < 0)
1187         {
1188           c = - c1;
1189         }
1190       else if (UTF_8_1_OCTET_P(c1))
1191         {
1192           c = c1;
1193         }
1194       else
1195         {
1196           ONE_MORE_BYTE (c2);
1197           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1198             goto invalid_code;
1199           if (UTF_8_2_OCTET_LEADING_P (c1))
1200             {
1201               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1202               /* Reject overlong sequences here and below.  Encoders
1203                  producing them are incorrect, they can be misleading,
1204                  and they mess up read/write invariance.  */
1205               if (c < 128)
1206                 goto invalid_code;
1207             }
1208           else
1209             {
1210               ONE_MORE_BYTE (c3);
1211               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1212                 goto invalid_code;
1213               if (UTF_8_3_OCTET_LEADING_P (c1))
1214                 {
1215                   c = (((c1 & 0xF) << 12)
1216                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1217                   if (c < 0x800
1218                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1219                     goto invalid_code;
1220                 }
1221               else
1222                 {
1223                   ONE_MORE_BYTE (c4);
1224                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1225                     goto invalid_code;
1226                   if (UTF_8_4_OCTET_LEADING_P (c1))
1227                     {
1228                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1229                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1230                     if (c < 0x10000)
1231                       goto invalid_code;
1232                     }
1233                   else
1234                     {
1235                       ONE_MORE_BYTE (c5);
1236                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1237                         goto invalid_code;
1238                       if (UTF_8_5_OCTET_LEADING_P (c1))
1239                         {
1240                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1241                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1242                                | (c5 & 0x3F));
1243                           if ((c > MAX_CHAR) || (c < 0x200000))
1244                             goto invalid_code;
1245                         }
1246                       else
1247                         goto invalid_code;
1248                     }
1249                 }
1250             }
1251         }
1252
1253       *charbuf++ = c;
1254       continue;
1255
1256     invalid_code:
1257       src = src_base;
1258       consumed_chars = consumed_chars_base;
1259       ONE_MORE_BYTE (c);
1260       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1261       coding->errors++;
1262     }
1263
1264  no_more_source:
1265   coding->consumed_char += consumed_chars_base;
1266   coding->consumed = src_base - coding->source;
1267   coding->charbuf_used = charbuf - coding->charbuf;
1268 }
1269
1270
1271 static int
1272 encode_coding_utf_8 (coding)
1273      struct coding_system *coding;
1274 {
1275   int multibytep = coding->dst_multibyte;
1276   int *charbuf = coding->charbuf;
1277   int *charbuf_end = charbuf + coding->charbuf_used;
1278   unsigned char *dst = coding->destination + coding->produced;
1279   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1280   int produced_chars = 0;
1281   int c;
1282
1283   if (multibytep)
1284     {
1285       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1286
1287       while (charbuf < charbuf_end)
1288         {
1289           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1290
1291           ASSURE_DESTINATION (safe_room);
1292           c = *charbuf++;
1293           if (CHAR_BYTE8_P (c))
1294             {
1295               c = CHAR_TO_BYTE8 (c);
1296               EMIT_ONE_BYTE (c);
1297             }
1298           else
1299             {
1300               CHAR_STRING_ADVANCE (c, pend);
1301               for (p = str; p < pend; p++)
1302                 EMIT_ONE_BYTE (*p);
1303             }
1304         }
1305     }
1306   else
1307     {
1308       int safe_room = MAX_MULTIBYTE_LENGTH;
1309
1310       while (charbuf < charbuf_end)
1311         {
1312           ASSURE_DESTINATION (safe_room);
1313           c = *charbuf++;
1314           dst += CHAR_STRING (c, dst);
1315           produced_chars++;
1316         }
1317     }
1318   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1319   coding->produced_char += produced_chars;
1320   coding->produced = dst - coding->destination;
1321   return 0;
1322 }
1323
1324
1325 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1326    Check if a text is encoded in one of UTF-16 based coding systems.
1327    If it is, return 1, else return 0.  */
1328
1329 #define UTF_16_HIGH_SURROGATE_P(val) \
1330   (((val) & 0xFC00) == 0xD800)
1331
1332 #define UTF_16_LOW_SURROGATE_P(val) \
1333   (((val) & 0xFC00) == 0xDC00)
1334
1335 #define UTF_16_INVALID_P(val)   \
1336   (((val) == 0xFFFE)            \
1337    || ((val) == 0xFFFF)         \
1338    || UTF_16_LOW_SURROGATE_P (val))
1339
1340
1341 static int
1342 detect_coding_utf_16 (coding, detect_info)
1343      struct coding_system *coding;
1344      struct coding_detection_info *detect_info;
1345 {
1346   const unsigned char *src = coding->source, *src_base = src;
1347   const unsigned char *src_end = coding->source + coding->src_bytes;
1348   int multibytep = coding->src_multibyte;
1349   int consumed_chars = 0;
1350   int c1, c2;
1351
1352   detect_info->checked |= CATEGORY_MASK_UTF_16;
1353   if (coding->mode & CODING_MODE_LAST_BLOCK
1354       && (coding->src_chars & 1))
1355     {
1356       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1357       return 0;
1358     }
1359
1360   ONE_MORE_BYTE (c1);
1361   ONE_MORE_BYTE (c2);
1362   if ((c1 == 0xFF) && (c2 == 0xFE))
1363     {
1364       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1365                              | CATEGORY_MASK_UTF_16_AUTO);
1366       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1367                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1368                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1369     }
1370   else if ((c1 == 0xFE) && (c2 == 0xFF))
1371     {
1372       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1373                              | CATEGORY_MASK_UTF_16_AUTO);
1374       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1375                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1376                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1377     }
1378   else if (c1 >= 0 && c2 >= 0)
1379     {
1380       unsigned char b1[256], b2[256];
1381       int b1_variants = 1, b2_variants = 1;
1382       int n;
1383
1384       bzero (b1, 256), bzero (b2, 256);
1385       b1[c1]++, b2[c2]++;
1386       for (n = 0; n < 256 && src < src_end; n++)
1387         {
1388           src_base = src;
1389           ONE_MORE_BYTE (c1);
1390           ONE_MORE_BYTE (c2);
1391           if (c1 < 0 || c2 < 0)
1392             break;
1393           if (! b1[c1++]) b1_variants++;
1394           if (! b2[c2++]) b2_variants++;
1395         }
1396       if (b1_variants < b2_variants)
1397         detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1398       else
1399         detect_info->found |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1400       detect_info->rejected
1401         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1402     }
1403  no_more_source:
1404   return 1;
1405 }
1406
1407 static void
1408 decode_coding_utf_16 (coding)
1409      struct coding_system *coding;
1410 {
1411   const unsigned char *src = coding->source + coding->consumed;
1412   const unsigned char *src_end = coding->source + coding->src_bytes;
1413   const unsigned char *src_base;
1414   int *charbuf = coding->charbuf + coding->charbuf_used;
1415   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1416   int consumed_chars = 0, consumed_chars_base;
1417   int multibytep = coding->src_multibyte;
1418   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1419   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1420   int surrogate = CODING_UTF_16_SURROGATE (coding);
1421   Lisp_Object attr, charset_list;
1422
1423   CODING_GET_INFO (coding, attr, charset_list);
1424
1425   if (bom == utf_16_with_bom)
1426     {
1427       int c, c1, c2;
1428
1429       src_base = src;
1430       ONE_MORE_BYTE (c1);
1431       ONE_MORE_BYTE (c2);
1432       c = (c1 << 8) | c2;
1433
1434       if (endian == utf_16_big_endian
1435           ? c != 0xFEFF : c != 0xFFFE)
1436         {
1437           /* The first two bytes are not BOM.  Treat them as bytes
1438              for a normal character.  */
1439           src = src_base;
1440           coding->errors++;
1441         }
1442       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1443     }
1444   else if (bom == utf_16_detect_bom)
1445     {
1446       /* We have already tried to detect BOM and failed in
1447          detect_coding.  */
1448       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1449     }
1450
1451   while (1)
1452     {
1453       int c, c1, c2;
1454
1455       src_base = src;
1456       consumed_chars_base = consumed_chars;
1457
1458       if (charbuf + 2 >= charbuf_end)
1459         break;
1460
1461       ONE_MORE_BYTE (c1);
1462       if (c1 < 0)
1463         {
1464           *charbuf++ = -c1;
1465           continue;
1466         }
1467       ONE_MORE_BYTE (c2);
1468       if (c2 < 0)
1469         {
1470           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1471           *charbuf++ = -c2;
1472           continue;
1473         }
1474       c = (endian == utf_16_big_endian
1475            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1476       if (surrogate)
1477         {
1478           if (! UTF_16_LOW_SURROGATE_P (c))
1479             {
1480               if (endian == utf_16_big_endian)
1481                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1482               else
1483                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1484               *charbuf++ = c1;
1485               *charbuf++ = c2;
1486               coding->errors++;
1487               if (UTF_16_HIGH_SURROGATE_P (c))
1488                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1489               else
1490                 *charbuf++ = c;
1491             }
1492           else
1493             {
1494               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1495               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1496               *charbuf++ = 0x10000 + c;
1497             }
1498         }
1499       else
1500         {
1501           if (UTF_16_HIGH_SURROGATE_P (c))
1502             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1503           else
1504             *charbuf++ = c;
1505         }
1506     }
1507
1508  no_more_source:
1509   coding->consumed_char += consumed_chars_base;
1510   coding->consumed = src_base - coding->source;
1511   coding->charbuf_used = charbuf - coding->charbuf;
1512 }
1513
1514 static int
1515 encode_coding_utf_16 (coding)
1516      struct coding_system *coding;
1517 {
1518   int multibytep = coding->dst_multibyte;
1519   int *charbuf = coding->charbuf;
1520   int *charbuf_end = charbuf + coding->charbuf_used;
1521   unsigned char *dst = coding->destination + coding->produced;
1522   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1523   int safe_room = 8;
1524   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1525   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1526   int produced_chars = 0;
1527   Lisp_Object attrs, charset_list;
1528   int c;
1529
1530   CODING_GET_INFO (coding, attrs, charset_list);
1531
1532   if (bom != utf_16_without_bom)
1533     {
1534       ASSURE_DESTINATION (safe_room);
1535       if (big_endian)
1536         EMIT_TWO_BYTES (0xFE, 0xFF);
1537       else
1538         EMIT_TWO_BYTES (0xFF, 0xFE);
1539       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1540     }
1541
1542   while (charbuf < charbuf_end)
1543     {
1544       ASSURE_DESTINATION (safe_room);
1545       c = *charbuf++;
1546       if (c >= MAX_UNICODE_CHAR)
1547         c = coding->default_char;
1548
1549       if (c < 0x10000)
1550         {
1551           if (big_endian)
1552             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1553           else
1554             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1555         }
1556       else
1557         {
1558           int c1, c2;
1559
1560           c -= 0x10000;
1561           c1 = (c >> 10) + 0xD800;
1562           c2 = (c & 0x3FF) + 0xDC00;
1563           if (big_endian)
1564             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1565           else
1566             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1567         }
1568     }
1569   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1570   coding->produced = dst - coding->destination;
1571   coding->produced_char += produced_chars;
1572   return 0;
1573 }
1574
1575 \f
1576 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1577
1578 /* Emacs' internal format for representation of multiple character
1579    sets is a kind of multi-byte encoding, i.e. characters are
1580    represented by variable-length sequences of one-byte codes.
1581
1582    ASCII characters and control characters (e.g. `tab', `newline') are
1583    represented by one-byte sequences which are their ASCII codes, in
1584    the range 0x00 through 0x7F.
1585
1586    8-bit characters of the range 0x80..0x9F are represented by
1587    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1588    code + 0x20).
1589
1590    8-bit characters of the range 0xA0..0xFF are represented by
1591    one-byte sequences which are their 8-bit code.
1592
1593    The other characters are represented by a sequence of `base
1594    leading-code', optional `extended leading-code', and one or two
1595    `position-code's.  The length of the sequence is determined by the
1596    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1597    whereas extended leading-code and position-code take the range 0xA0
1598    through 0xFF.  See `charset.h' for more details about leading-code
1599    and position-code.
1600
1601    --- CODE RANGE of Emacs' internal format ---
1602    character set        range
1603    -------------        -----
1604    ascii                0x00..0x7F
1605    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1606    eight-bit-graphic    0xA0..0xBF
1607    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1608    ---------------------------------------------
1609
1610    As this is the internal character representation, the format is
1611    usually not used externally (i.e. in a file or in a data sent to a
1612    process).  But, it is possible to have a text externally in this
1613    format (i.e. by encoding by the coding system `emacs-mule').
1614
1615    In that case, a sequence of one-byte codes has a slightly different
1616    form.
1617
1618    At first, all characters in eight-bit-control are represented by
1619    one-byte sequences which are their 8-bit code.
1620
1621    Next, character composition data are represented by the byte
1622    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1623    where,
1624         METHOD is 0xF0 plus one of composition method (enum
1625         composition_method),
1626
1627         BYTES is 0xA0 plus a byte length of this composition data,
1628
1629         CHARS is 0x20 plus a number of characters composed by this
1630         data,
1631
1632         COMPONENTs are characters of multibye form or composition
1633         rules encoded by two-byte of ASCII codes.
1634
1635    In addition, for backward compatibility, the following formats are
1636    also recognized as composition data on decoding.
1637
1638    0x80 MSEQ ...
1639    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1640
1641    Here,
1642         MSEQ is a multibyte form but in these special format:
1643           ASCII: 0xA0 ASCII_CODE+0x80,
1644           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1645         RULE is a one byte code of the range 0xA0..0xF0 that
1646         represents a composition rule.
1647   */
1648
1649 char emacs_mule_bytes[256];
1650
1651 int
1652 emacs_mule_char (coding, src, nbytes, nchars, id)
1653      struct coding_system *coding;
1654      const unsigned char *src;
1655      int *nbytes, *nchars, *id;
1656 {
1657   const unsigned char *src_end = coding->source + coding->src_bytes;
1658   const unsigned char *src_base = src;
1659   int multibytep = coding->src_multibyte;
1660   struct charset *charset;
1661   unsigned code;
1662   int c;
1663   int consumed_chars = 0;
1664
1665   ONE_MORE_BYTE (c);
1666   if (c < 0)
1667     {
1668       c = -c;
1669       charset = emacs_mule_charset[0];
1670     }
1671   else
1672     {
1673       switch (emacs_mule_bytes[c])
1674         {
1675         case 2:
1676           if (! (charset = emacs_mule_charset[c]))
1677             goto invalid_code;
1678           ONE_MORE_BYTE (c);
1679           if (c < 0)
1680             goto invalid_code;
1681           code = c & 0x7F;
1682           break;
1683
1684         case 3:
1685           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1686               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1687             {
1688               ONE_MORE_BYTE (c);
1689               if (c < 0 || ! (charset = emacs_mule_charset[c]))
1690                 goto invalid_code;
1691               ONE_MORE_BYTE (c);
1692               if (c < 0)
1693                 goto invalid_code;
1694               code = c & 0x7F;
1695             }
1696           else
1697             {
1698               if (! (charset = emacs_mule_charset[c]))
1699                 goto invalid_code;
1700               ONE_MORE_BYTE (c);
1701               if (c < 0)
1702                 goto invalid_code;
1703               code = (c & 0x7F) << 8;
1704               ONE_MORE_BYTE (c);
1705               if (c < 0)
1706                 goto invalid_code;
1707               code |= c & 0x7F;
1708             }
1709           break;
1710
1711         case 4:
1712           ONE_MORE_BYTE (c);
1713           if (c < 0 || ! (charset = emacs_mule_charset[c]))
1714             goto invalid_code;
1715           ONE_MORE_BYTE (c);
1716           if (c < 0)
1717             goto invalid_code;
1718           code = (c & 0x7F) << 8;
1719           ONE_MORE_BYTE (c);
1720           if (c < 0)
1721             goto invalid_code;
1722           code |= c & 0x7F;
1723           break;
1724
1725         case 1:
1726           code = c;
1727           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1728                                      ? charset_ascii : charset_eight_bit);
1729           break;
1730
1731         default:
1732           abort ();
1733         }
1734       c = DECODE_CHAR (charset, code);
1735       if (c < 0)
1736         goto invalid_code;
1737     }
1738   *nbytes = src - src_base;
1739   *nchars = consumed_chars;
1740   if (id)
1741     *id = charset->id;
1742   return c;
1743
1744  no_more_source:
1745   return -2;
1746
1747  invalid_code:
1748   return -1;
1749 }
1750
1751
1752 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1753    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1754    else return 0.  */
1755
1756 static int
1757 detect_coding_emacs_mule (coding, detect_info)
1758      struct coding_system *coding;
1759      struct coding_detection_info *detect_info;
1760 {
1761   const unsigned char *src = coding->source, *src_base;
1762   const unsigned char *src_end = coding->source + coding->src_bytes;
1763   int multibytep = coding->src_multibyte;
1764   int consumed_chars = 0;
1765   int c;
1766   int found = 0;
1767
1768   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1769   /* A coding system of this category is always ASCII compatible.  */
1770   src += coding->head_ascii;
1771
1772   while (1)
1773     {
1774       src_base = src;
1775       ONE_MORE_BYTE (c);
1776       if (c < 0)
1777         continue;
1778       if (c == 0x80)
1779         {
1780           /* Perhaps the start of composite character.  We simple skip
1781              it because analyzing it is too heavy for detecting.  But,
1782              at least, we check that the composite character
1783              constitues of more than 4 bytes.  */
1784           const unsigned char *src_base;
1785
1786         repeat:
1787           src_base = src;
1788           do
1789             {
1790               ONE_MORE_BYTE (c);
1791             }
1792           while (c >= 0xA0);
1793
1794           if (src - src_base <= 4)
1795             break;
1796           found = CATEGORY_MASK_EMACS_MULE;
1797           if (c == 0x80)
1798             goto repeat;
1799         }
1800
1801       if (c < 0x80)
1802         {
1803           if (c < 0x20
1804               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1805             break;
1806         }
1807       else
1808         {
1809           int more_bytes = emacs_mule_bytes[*src_base] - 1;
1810
1811           while (more_bytes > 0)
1812             {
1813               ONE_MORE_BYTE (c);
1814               if (c < 0xA0)
1815                 {
1816                   src--;        /* Unread the last byte.  */
1817                   break;
1818                 }
1819               more_bytes--;
1820             }
1821           if (more_bytes != 0)
1822             break;
1823           found = CATEGORY_MASK_EMACS_MULE;
1824         }
1825     }
1826   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1827   return 0;
1828
1829  no_more_source:
1830   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1831     {
1832       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1833       return 0;
1834     }
1835   detect_info->found |= found;
1836   return 1;
1837 }
1838
1839
1840 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1841
1842 /* Decode a character represented as a component of composition
1843    sequence of Emacs 20/21 style at SRC.  Set C to that character and
1844    update SRC to the head of next character (or an encoded composition
1845    rule).  If SRC doesn't points a composition component, set C to -1.
1846    If SRC points an invalid byte sequence, global exit by a return
1847    value 0.  */
1848
1849 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
1850   if (1)                                                        \
1851     {                                                           \
1852       int c;                                                    \
1853       int nbytes, nchars;                                       \
1854                                                                 \
1855       if (src == src_end)                                       \
1856         break;                                                  \
1857       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1858       if (c < 0)                                                \
1859         {                                                       \
1860           if (c == -2)                                          \
1861             break;                                              \
1862           goto invalid_code;                                    \
1863         }                                                       \
1864       *buf++ = c;                                               \
1865       src += nbytes;                                            \
1866       consumed_chars += nchars;                                 \
1867     }                                                           \
1868   else
1869
1870
1871 /* Decode a composition rule represented as a component of composition
1872    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
1873    and increment BUF.  If SRC points an invalid byte sequence, set C
1874    to -1.  */
1875
1876 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
1877   do {                                                  \
1878     int c, gref, nref;                                  \
1879                                                         \
1880     if (src >= src_end)                                 \
1881       goto invalid_code;                                \
1882     ONE_MORE_BYTE_NO_CHECK (c);                         \
1883     c -= 0x20;                                          \
1884     if (c < 0 || c >= 81)                               \
1885       goto invalid_code;                                \
1886                                                         \
1887     gref = c / 9, nref = c % 9;                         \
1888     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1889   } while (0)
1890
1891
1892 /* Decode a composition rule represented as a component of composition
1893    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
1894    and increment BUF.  If SRC points an invalid byte sequence, set C
1895    to -1.  */
1896
1897 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
1898   do {                                                  \
1899     int gref, nref;                                     \
1900                                                         \
1901     if (src + 1>= src_end)                              \
1902       goto invalid_code;                                \
1903     ONE_MORE_BYTE_NO_CHECK (gref);                      \
1904     gref -= 0x20;                                       \
1905     ONE_MORE_BYTE_NO_CHECK (nref);                      \
1906     nref -= 0x20;                                       \
1907     if (gref < 0 || gref >= 81                          \
1908         || nref < 0 || nref >= 81)                      \
1909       goto invalid_code;                                \
1910     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1911   } while (0)
1912
1913
1914 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
1915   do {                                                                  \
1916     /* Emacs 21 style format.  The first three bytes at SRC are         \
1917        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
1918        the byte length of this composition information, CHARS is the    \
1919        number of characters composed by this composition.  */           \
1920     enum composition_method method = c - 0xF2;                          \
1921     int *charbuf_base = charbuf;                                        \
1922     int consumed_chars_limit;                                           \
1923     int nbytes, nchars;                                                 \
1924                                                                         \
1925     ONE_MORE_BYTE (c);                                                  \
1926     if (c < 0)                                                          \
1927       goto invalid_code;                                                \
1928     nbytes = c - 0xA0;                                                  \
1929     if (nbytes < 3)                                                     \
1930       goto invalid_code;                                                \
1931     ONE_MORE_BYTE (c);                                                  \
1932     if (c < 0)                                                          \
1933       goto invalid_code;                                                \
1934     nchars = c - 0xA0;                                                  \
1935     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
1936     consumed_chars_limit = consumed_chars_base + nbytes;                \
1937     if (method != COMPOSITION_RELATIVE)                                 \
1938       {                                                                 \
1939         int i = 0;                                                      \
1940         while (consumed_chars < consumed_chars_limit)                   \
1941           {                                                             \
1942             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
1943               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
1944             else                                                        \
1945               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
1946             i++;                                                        \
1947           }                                                             \
1948         if (consumed_chars < consumed_chars_limit)                      \
1949           goto invalid_code;                                            \
1950         charbuf_base[0] -= i;                                           \
1951       }                                                                 \
1952   } while (0)
1953
1954
1955 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)            \
1956   do {                                                          \
1957     /* Emacs 20 style format for relative composition.  */      \
1958     /* Store multibyte form of characters to be composed.  */   \
1959     enum composition_method method = COMPOSITION_RELATIVE;      \
1960     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1961     int *buf = components;                                      \
1962     int i, j;                                                   \
1963                                                                 \
1964     src = src_base;                                             \
1965     ONE_MORE_BYTE (c);          /* skip 0x80 */                 \
1966     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1967       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                 \
1968     if (i < 2)                                                  \
1969       goto invalid_code;                                        \
1970     ADD_COMPOSITION_DATA (charbuf, i, method);                  \
1971     for (j = 0; j < i; j++)                                     \
1972       *charbuf++ = components[j];                               \
1973   } while (0)
1974
1975
1976 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
1977   do {                                                          \
1978     /* Emacs 20 style format for rule-base composition.  */     \
1979     /* Store multibyte form of characters to be composed.  */   \
1980     enum composition_method method = COMPOSITION_WITH_RULE;     \
1981     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1982     int *buf = components;                                      \
1983     int i, j;                                                   \
1984                                                                 \
1985     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
1986     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1987       {                                                         \
1988         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
1989         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
1990       }                                                         \
1991     if (i < 1 || (buf - components) % 2 == 0)                   \
1992       goto invalid_code;                                        \
1993     if (charbuf + i + (i / 2) + 1 < charbuf_end)                \
1994       goto no_more_source;                                      \
1995     ADD_COMPOSITION_DATA (buf, i, method);                      \
1996     for (j = 0; j < i; j++)                                     \
1997       *charbuf++ = components[j];                               \
1998     for (j = 0; j < i; j += 2)                                  \
1999       *charbuf++ = components[j];                               \
2000   } while (0)
2001
2002
2003 static void
2004 decode_coding_emacs_mule (coding)
2005      struct coding_system *coding;
2006 {
2007   const unsigned char *src = coding->source + coding->consumed;
2008   const unsigned char *src_end = coding->source + coding->src_bytes;
2009   const unsigned char *src_base;
2010   int *charbuf = coding->charbuf + coding->charbuf_used;
2011   int *charbuf_end
2012     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2013   int consumed_chars = 0, consumed_chars_base;
2014   int multibytep = coding->src_multibyte;
2015   Lisp_Object attrs, charset_list;
2016   int char_offset = coding->produced_char;
2017   int last_offset = char_offset;
2018   int last_id = charset_ascii;
2019
2020   CODING_GET_INFO (coding, attrs, charset_list);
2021
2022   while (1)
2023     {
2024       int c;
2025
2026       src_base = src;
2027       consumed_chars_base = consumed_chars;
2028
2029       if (charbuf >= charbuf_end)
2030         break;
2031
2032       ONE_MORE_BYTE (c);
2033       if (c < 0)
2034         {
2035           *charbuf++ = -c;
2036           char_offset++;
2037         }
2038       else if (c < 0x80)
2039         {
2040           *charbuf++ = c;
2041           char_offset++;
2042         }
2043       else if (c == 0x80)
2044         {
2045           ONE_MORE_BYTE (c);
2046           if (c < 0)
2047             goto invalid_code;
2048           if (c - 0xF2 >= COMPOSITION_RELATIVE
2049               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2050             DECODE_EMACS_MULE_21_COMPOSITION (c);
2051           else if (c < 0xC0)
2052             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2053           else if (c == 0xFF)
2054             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2055           else
2056             goto invalid_code;
2057         }
2058       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2059         {
2060           int nbytes, nchars;
2061           int id;
2062
2063           src = src_base;
2064           consumed_chars = consumed_chars_base;
2065           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2066           if (c < 0)
2067             {
2068               if (c == -2)
2069                 break;
2070               goto invalid_code;
2071             }
2072           if (last_id != id)
2073             {
2074               if (last_id != charset_ascii)
2075                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2076               last_id = id;
2077               last_offset = char_offset;
2078             }
2079           *charbuf++ = c;
2080           src += nbytes;
2081           consumed_chars += nchars;
2082           char_offset++;
2083         }
2084       continue;
2085
2086     invalid_code:
2087       src = src_base;
2088       consumed_chars = consumed_chars_base;
2089       ONE_MORE_BYTE (c);
2090       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2091       char_offset++;
2092       coding->errors++;
2093     }
2094
2095  no_more_source:
2096   if (last_id != charset_ascii)
2097     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2098   coding->consumed_char += consumed_chars_base;
2099   coding->consumed = src_base - coding->source;
2100   coding->charbuf_used = charbuf - coding->charbuf;
2101 }
2102
2103
2104 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2105   do {                                          \
2106     if (id < 0xA0)                              \
2107       codes[0] = id, codes[1] = 0;              \
2108     else if (id < 0xE0)                         \
2109       codes[0] = 0x9A, codes[1] = id;           \
2110     else if (id < 0xF0)                         \
2111       codes[0] = 0x9B, codes[1] = id;           \
2112     else if (id < 0xF5)                         \
2113       codes[0] = 0x9C, codes[1] = id;           \
2114     else                                        \
2115       codes[0] = 0x9D, codes[1] = id;           \
2116   } while (0);
2117
2118
2119 static int
2120 encode_coding_emacs_mule (coding)
2121      struct coding_system *coding;
2122 {
2123   int multibytep = coding->dst_multibyte;
2124   int *charbuf = coding->charbuf;
2125   int *charbuf_end = charbuf + coding->charbuf_used;
2126   unsigned char *dst = coding->destination + coding->produced;
2127   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2128   int safe_room = 8;
2129   int produced_chars = 0;
2130   Lisp_Object attrs, charset_list;
2131   int c;
2132   int preferred_charset_id = -1;
2133
2134   CODING_GET_INFO (coding, attrs, charset_list);
2135   if (! EQ (charset_list, Vemacs_mule_charset_list))
2136     {
2137       CODING_ATTR_CHARSET_LIST (attrs)
2138         = charset_list = Vemacs_mule_charset_list;
2139     }
2140
2141   while (charbuf < charbuf_end)
2142     {
2143       ASSURE_DESTINATION (safe_room);
2144       c = *charbuf++;
2145
2146       if (c < 0)
2147         {
2148           /* Handle an annotation.  */
2149           switch (*charbuf)
2150             {
2151             case CODING_ANNOTATE_COMPOSITION_MASK:
2152               /* Not yet implemented.  */
2153               break;
2154             case CODING_ANNOTATE_CHARSET_MASK:
2155               preferred_charset_id = charbuf[3];
2156               if (preferred_charset_id >= 0
2157                   && NILP (Fmemq (make_number (preferred_charset_id),
2158                                   charset_list)))
2159                 preferred_charset_id = -1;
2160               break;
2161             default:
2162               abort ();
2163             }
2164           charbuf += -c - 1;
2165           continue;
2166         }
2167
2168       if (ASCII_CHAR_P (c))
2169         EMIT_ONE_ASCII_BYTE (c);
2170       else if (CHAR_BYTE8_P (c))
2171         {
2172           c = CHAR_TO_BYTE8 (c);
2173           EMIT_ONE_BYTE (c);
2174         }
2175       else
2176         {
2177           struct charset *charset;
2178           unsigned code;
2179           int dimension;
2180           int emacs_mule_id;
2181           unsigned char leading_codes[2];
2182
2183           if (preferred_charset_id >= 0)
2184             {
2185               charset = CHARSET_FROM_ID (preferred_charset_id);
2186               if (! CHAR_CHARSET_P (c, charset))
2187                 charset = char_charset (c, charset_list, NULL);
2188             }
2189           else
2190             charset = char_charset (c, charset_list, &code);
2191           if (! charset)
2192             {
2193               c = coding->default_char;
2194               if (ASCII_CHAR_P (c))
2195                 {
2196                   EMIT_ONE_ASCII_BYTE (c);
2197                   continue;
2198                 }
2199               charset = char_charset (c, charset_list, &code);
2200             }
2201           dimension = CHARSET_DIMENSION (charset);
2202           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2203           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2204           EMIT_ONE_BYTE (leading_codes[0]);
2205           if (leading_codes[1])
2206             EMIT_ONE_BYTE (leading_codes[1]);
2207           if (dimension == 1)
2208             EMIT_ONE_BYTE (code | 0x80);
2209           else
2210             {
2211               code |= 0x8080;
2212               EMIT_ONE_BYTE (code >> 8);
2213               EMIT_ONE_BYTE (code & 0xFF);
2214             }
2215         }
2216     }
2217   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2218   coding->produced_char += produced_chars;
2219   coding->produced = dst - coding->destination;
2220   return 0;
2221 }
2222
2223 \f
2224 /*** 7. ISO2022 handlers ***/
2225
2226 /* The following note describes the coding system ISO2022 briefly.
2227    Since the intention of this note is to help understand the
2228    functions in this file, some parts are NOT ACCURATE or are OVERLY
2229    SIMPLIFIED.  For thorough understanding, please refer to the
2230    original document of ISO2022.  This is equivalent to the standard
2231    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2232
2233    ISO2022 provides many mechanisms to encode several character sets
2234    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2235    is encoded using bytes less than 128.  This may make the encoded
2236    text a little bit longer, but the text passes more easily through
2237    several types of gateway, some of which strip off the MSB (Most
2238    Significant Bit).
2239
2240    There are two kinds of character sets: control character sets and
2241    graphic character sets.  The former contain control characters such
2242    as `newline' and `escape' to provide control functions (control
2243    functions are also provided by escape sequences).  The latter
2244    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2245    two control character sets and many graphic character sets.
2246
2247    Graphic character sets are classified into one of the following
2248    four classes, according to the number of bytes (DIMENSION) and
2249    number of characters in one dimension (CHARS) of the set:
2250    - DIMENSION1_CHARS94
2251    - DIMENSION1_CHARS96
2252    - DIMENSION2_CHARS94
2253    - DIMENSION2_CHARS96
2254
2255    In addition, each character set is assigned an identification tag,
2256    unique for each set, called the "final character" (denoted as <F>
2257    hereafter).  The <F> of each character set is decided by ECMA(*)
2258    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2259    (0x30..0x3F are for private use only).
2260
2261    Note (*): ECMA = European Computer Manufacturers Association
2262
2263    Here are examples of graphic character sets [NAME(<F>)]:
2264         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2265         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2266         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2267         o DIMENSION2_CHARS96 -- none for the moment
2268
2269    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2270         C0 [0x00..0x1F] -- control character plane 0
2271         GL [0x20..0x7F] -- graphic character plane 0
2272         C1 [0x80..0x9F] -- control character plane 1
2273         GR [0xA0..0xFF] -- graphic character plane 1
2274
2275    A control character set is directly designated and invoked to C0 or
2276    C1 by an escape sequence.  The most common case is that:
2277    - ISO646's  control character set is designated/invoked to C0, and
2278    - ISO6429's control character set is designated/invoked to C1,
2279    and usually these designations/invocations are omitted in encoded
2280    text.  In a 7-bit environment, only C0 can be used, and a control
2281    character for C1 is encoded by an appropriate escape sequence to
2282    fit into the environment.  All control characters for C1 are
2283    defined to have corresponding escape sequences.
2284
2285    A graphic character set is at first designated to one of four
2286    graphic registers (G0 through G3), then these graphic registers are
2287    invoked to GL or GR.  These designations and invocations can be
2288    done independently.  The most common case is that G0 is invoked to
2289    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2290    these invocations and designations are omitted in encoded text.
2291    In a 7-bit environment, only GL can be used.
2292
2293    When a graphic character set of CHARS94 is invoked to GL, codes
2294    0x20 and 0x7F of the GL area work as control characters SPACE and
2295    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2296    be used.
2297
2298    There are two ways of invocation: locking-shift and single-shift.
2299    With locking-shift, the invocation lasts until the next different
2300    invocation, whereas with single-shift, the invocation affects the
2301    following character only and doesn't affect the locking-shift
2302    state.  Invocations are done by the following control characters or
2303    escape sequences:
2304
2305    ----------------------------------------------------------------------
2306    abbrev  function                  cntrl escape seq   description
2307    ----------------------------------------------------------------------
2308    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2309    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2310    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2311    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2312    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2313    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2314    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2315    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2316    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2317    ----------------------------------------------------------------------
2318    (*) These are not used by any known coding system.
2319
2320    Control characters for these functions are defined by macros
2321    ISO_CODE_XXX in `coding.h'.
2322
2323    Designations are done by the following escape sequences:
2324    ----------------------------------------------------------------------
2325    escape sequence      description
2326    ----------------------------------------------------------------------
2327    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2328    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2329    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2330    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2331    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2332    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2333    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2334    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2335    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2336    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2337    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2338    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2339    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2340    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2341    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2342    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2343    ----------------------------------------------------------------------
2344
2345    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2346    of dimension 1, chars 94, and final character <F>, etc...
2347
2348    Note (*): Although these designations are not allowed in ISO2022,
2349    Emacs accepts them on decoding, and produces them on encoding
2350    CHARS96 character sets in a coding system which is characterized as
2351    7-bit environment, non-locking-shift, and non-single-shift.
2352
2353    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2354    '(' must be omitted.  We refer to this as "short-form" hereafter.
2355
2356    Now you may notice that there are a lot of ways of encoding the
2357    same multilingual text in ISO2022.  Actually, there exist many
2358    coding systems such as Compound Text (used in X11's inter client
2359    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2360    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2361    localized platforms), and all of these are variants of ISO2022.
2362
2363    In addition to the above, Emacs handles two more kinds of escape
2364    sequences: ISO6429's direction specification and Emacs' private
2365    sequence for specifying character composition.
2366
2367    ISO6429's direction specification takes the following form:
2368         o CSI ']'      -- end of the current direction
2369         o CSI '0' ']'  -- end of the current direction
2370         o CSI '1' ']'  -- start of left-to-right text
2371         o CSI '2' ']'  -- start of right-to-left text
2372    The control character CSI (0x9B: control sequence introducer) is
2373    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2374
2375    Character composition specification takes the following form:
2376         o ESC '0' -- start relative composition
2377         o ESC '1' -- end composition
2378         o ESC '2' -- start rule-base composition (*)
2379         o ESC '3' -- start relative composition with alternate chars  (**)
2380         o ESC '4' -- start rule-base composition with alternate chars  (**)
2381   Since these are not standard escape sequences of any ISO standard,
2382   the use of them with these meanings is restricted to Emacs only.
2383
2384   (*) This form is used only in Emacs 20.7 and older versions,
2385   but newer versions can safely decode it.
2386   (**) This form is used only in Emacs 21.1 and newer versions,
2387   and older versions can't decode it.
2388
2389   Here's a list of example usages of these composition escape
2390   sequences (categorized by `enum composition_method').
2391
2392   COMPOSITION_RELATIVE:
2393         ESC 0 CHAR [ CHAR ] ESC 1
2394   COMPOSITION_WITH_RULE:
2395         ESC 2 CHAR [ RULE CHAR ] ESC 1
2396   COMPOSITION_WITH_ALTCHARS:
2397         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2398   COMPOSITION_WITH_RULE_ALTCHARS:
2399         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2400
2401 enum iso_code_class_type iso_code_class[256];
2402
2403 #define SAFE_CHARSET_P(coding, id)      \
2404   ((id) <= (coding)->max_charset_id     \
2405    && (coding)->safe_charsets[id] >= 0)
2406
2407
2408 #define SHIFT_OUT_OK(category)  \
2409   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2410
2411 static void
2412 setup_iso_safe_charsets (attrs)
2413      Lisp_Object attrs;
2414 {
2415   Lisp_Object charset_list, safe_charsets;
2416   Lisp_Object request;
2417   Lisp_Object reg_usage;
2418   Lisp_Object tail;
2419   int reg94, reg96;
2420   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2421   int max_charset_id;
2422
2423   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2424   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2425       && ! EQ (charset_list, Viso_2022_charset_list))
2426     {
2427       CODING_ATTR_CHARSET_LIST (attrs)
2428         = charset_list = Viso_2022_charset_list;
2429       ASET (attrs, coding_attr_safe_charsets, Qnil);
2430     }
2431
2432   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2433     return;
2434
2435   max_charset_id = 0;
2436   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2437     {
2438       int id = XINT (XCAR (tail));
2439       if (max_charset_id < id)
2440         max_charset_id = id;
2441     }
2442
2443   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2444                                 make_number (255));
2445   request = AREF (attrs, coding_attr_iso_request);
2446   reg_usage = AREF (attrs, coding_attr_iso_usage);
2447   reg94 = XINT (XCAR (reg_usage));
2448   reg96 = XINT (XCDR (reg_usage));
2449
2450   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2451     {
2452       Lisp_Object id;
2453       Lisp_Object reg;
2454       struct charset *charset;
2455
2456       id = XCAR (tail);
2457       charset = CHARSET_FROM_ID (XINT (id));
2458       reg = Fcdr (Fassq (id, request));
2459       if (! NILP (reg))
2460         SSET (safe_charsets, XINT (id), XINT (reg));
2461       else if (charset->iso_chars_96)
2462         {
2463           if (reg96 < 4)
2464             SSET (safe_charsets, XINT (id), reg96);
2465         }
2466       else
2467         {
2468           if (reg94 < 4)
2469             SSET (safe_charsets, XINT (id), reg94);
2470         }
2471     }
2472   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2473 }
2474
2475
2476 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2477    Check if a text is encoded in one of ISO-2022 based codig systems.
2478    If it is, return 1, else return 0.  */
2479
2480 static int
2481 detect_coding_iso_2022 (coding, detect_info)
2482      struct coding_system *coding;
2483      struct coding_detection_info *detect_info;
2484 {
2485   const unsigned char *src = coding->source, *src_base = src;
2486   const unsigned char *src_end = coding->source + coding->src_bytes;
2487   int multibytep = coding->src_multibyte;
2488   int single_shifting = 0;
2489   int id;
2490   int c, c1;
2491   int consumed_chars = 0;
2492   int i;
2493   int rejected = 0;
2494   int found = 0;
2495
2496   detect_info->checked |= CATEGORY_MASK_ISO;
2497
2498   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2499     {
2500       struct coding_system *this = &(coding_categories[i]);
2501       Lisp_Object attrs, val;
2502
2503       attrs = CODING_ID_ATTRS (this->id);
2504       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2505           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2506         setup_iso_safe_charsets (attrs);
2507       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2508       this->max_charset_id = SCHARS (val) - 1;
2509       this->safe_charsets = (char *) SDATA (val);
2510     }
2511
2512   /* A coding system of this category is always ASCII compatible.  */
2513   src += coding->head_ascii;
2514
2515   while (rejected != CATEGORY_MASK_ISO)
2516     {
2517       src_base = src;
2518       ONE_MORE_BYTE (c);
2519       switch (c)
2520         {
2521         case ISO_CODE_ESC:
2522           if (inhibit_iso_escape_detection)
2523             break;
2524           single_shifting = 0;
2525           ONE_MORE_BYTE (c);
2526           if (c >= '(' && c <= '/')
2527             {
2528               /* Designation sequence for a charset of dimension 1.  */
2529               ONE_MORE_BYTE (c1);
2530               if (c1 < ' ' || c1 >= 0x80
2531                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2532                 /* Invalid designation sequence.  Just ignore.  */
2533                 break;
2534             }
2535           else if (c == '$')
2536             {
2537               /* Designation sequence for a charset of dimension 2.  */
2538               ONE_MORE_BYTE (c);
2539               if (c >= '@' && c <= 'B')
2540                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2541                 id = iso_charset_table[1][0][c];
2542               else if (c >= '(' && c <= '/')
2543                 {
2544                   ONE_MORE_BYTE (c1);
2545                   if (c1 < ' ' || c1 >= 0x80
2546                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2547                     /* Invalid designation sequence.  Just ignore.  */
2548                     break;
2549                 }
2550               else
2551                 /* Invalid designation sequence.  Just ignore it.  */
2552                 break;
2553             }
2554           else if (c == 'N' || c == 'O')
2555             {
2556               /* ESC <Fe> for SS2 or SS3.  */
2557               single_shifting = 1;
2558               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2559               break;
2560             }
2561           else if (c >= '0' && c <= '4')
2562             {
2563               /* ESC <Fp> for start/end composition.  */
2564               found |= CATEGORY_MASK_ISO;
2565               break;
2566             }
2567           else
2568             {
2569               /* Invalid escape sequence.  Just ignore it.  */
2570               break;
2571             }
2572
2573           /* We found a valid designation sequence for CHARSET.  */
2574           rejected |= CATEGORY_MASK_ISO_8BIT;
2575           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2576                               id))
2577             found |= CATEGORY_MASK_ISO_7;
2578           else
2579             rejected |= CATEGORY_MASK_ISO_7;
2580           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2581                               id))
2582             found |= CATEGORY_MASK_ISO_7_TIGHT;
2583           else
2584             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2585           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2586                               id))
2587             found |= CATEGORY_MASK_ISO_7_ELSE;
2588           else
2589             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2590           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2591                               id))
2592             found |= CATEGORY_MASK_ISO_8_ELSE;
2593           else
2594             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2595           break;
2596
2597         case ISO_CODE_SO:
2598         case ISO_CODE_SI:
2599           /* Locking shift out/in.  */
2600           if (inhibit_iso_escape_detection)
2601             break;
2602           single_shifting = 0;
2603           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2604           found |= CATEGORY_MASK_ISO_ELSE;
2605           break;
2606
2607         case ISO_CODE_CSI:
2608           /* Control sequence introducer.  */
2609           single_shifting = 0;
2610           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2611           found |= CATEGORY_MASK_ISO_8_ELSE;
2612           goto check_extra_latin;
2613
2614         case ISO_CODE_SS2:
2615         case ISO_CODE_SS3:
2616           /* Single shift.   */
2617           if (inhibit_iso_escape_detection)
2618             break;
2619           single_shifting = 0;
2620           rejected |= CATEGORY_MASK_ISO_7BIT;
2621           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2622               & CODING_ISO_FLAG_SINGLE_SHIFT)
2623             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
2624           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2625               & CODING_ISO_FLAG_SINGLE_SHIFT)
2626             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2627           if (single_shifting)
2628             break;
2629           goto check_extra_latin;
2630
2631         default:
2632           if (c < 0)
2633             continue;
2634           if (c < 0x80)
2635             {
2636               single_shifting = 0;
2637               break;
2638             }
2639           if (c >= 0xA0)
2640             {
2641               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2642               found |= CATEGORY_MASK_ISO_8_1;
2643               /* Check the length of succeeding codes of the range
2644                  0xA0..0FF.  If the byte length is even, we include
2645                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2646                  only when we are not single shifting.  */
2647               if (! single_shifting
2648                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2649                 {
2650                   int i = 1;
2651                   while (src < src_end)
2652                     {
2653                       ONE_MORE_BYTE (c);
2654                       if (c < 0xA0)
2655                         break;
2656                       i++;
2657                     }
2658
2659                   if (i & 1 && src < src_end)
2660                     rejected |= CATEGORY_MASK_ISO_8_2;
2661                   else
2662                     found |= CATEGORY_MASK_ISO_8_2;
2663                 }
2664               break;
2665             }
2666         check_extra_latin:
2667           single_shifting = 0;
2668           if (! VECTORP (Vlatin_extra_code_table)
2669               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2670             {
2671               rejected = CATEGORY_MASK_ISO;
2672               break;
2673             }
2674           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2675               & CODING_ISO_FLAG_LATIN_EXTRA)
2676             found |= CATEGORY_MASK_ISO_8_1;
2677           else
2678             rejected |= CATEGORY_MASK_ISO_8_1;
2679           rejected |= CATEGORY_MASK_ISO_8_2;
2680         }
2681     }
2682   detect_info->rejected |= CATEGORY_MASK_ISO;
2683   return 0;
2684
2685  no_more_source:
2686   detect_info->rejected |= rejected;
2687   detect_info->found |= (found & ~rejected);
2688   return 1;
2689 }
2690
2691
2692 /* Set designation state into CODING.  */
2693 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2694   do {                                                                  \
2695     int id, prev;                                                       \
2696                                                                         \
2697     if (final < '0' || final >= 128                                     \
2698         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2699         || !SAFE_CHARSET_P (coding, id))                                \
2700       {                                                                 \
2701         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2702         goto invalid_code;                                              \
2703       }                                                                 \
2704     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2705     if (id == charset_jisx0201_roman)                                   \
2706       {                                                                 \
2707         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2708           id = charset_ascii;                                           \
2709       }                                                                 \
2710     else if (id == charset_jisx0208_1978)                               \
2711       {                                                                 \
2712         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2713           id = charset_jisx0208;                                        \
2714       }                                                                 \
2715     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2716     /* If there was an invalid designation to REG previously, and this  \
2717        designation is ASCII to REG, we should keep this designation     \
2718        sequence.  */                                                    \
2719     if (prev == -2 && id == charset_ascii)                              \
2720       goto invalid_code;                                                \
2721   } while (0)
2722
2723
2724 #define MAYBE_FINISH_COMPOSITION()                              \
2725   do {                                                          \
2726     int i;                                                      \
2727     if (composition_state == COMPOSING_NO)                      \
2728       break;                                                    \
2729     /* It is assured that we have enough room for producing     \
2730        characters stored in the table `components'.  */         \
2731     if (charbuf + component_idx > charbuf_end)                  \
2732       goto no_more_source;                                      \
2733     composition_state = COMPOSING_NO;                           \
2734     if (method == COMPOSITION_RELATIVE                          \
2735         || method == COMPOSITION_WITH_ALTCHARS)                 \
2736       {                                                         \
2737         for (i = 0; i < component_idx; i++)                     \
2738           *charbuf++ = components[i];                           \
2739         char_offset += component_idx;                           \
2740       }                                                         \
2741     else                                                        \
2742       {                                                         \
2743         for (i = 0; i < component_idx; i += 2)                  \
2744           *charbuf++ = components[i];                           \
2745         char_offset += (component_idx / 2) + 1;                 \
2746       }                                                         \
2747   } while (0)
2748
2749
2750 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2751    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2752    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2753    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2754    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2755   */
2756
2757 #define DECODE_COMPOSITION_START(c1)                                    \
2758   do {                                                                  \
2759     if (c1 == '0'                                                       \
2760         && composition_state == COMPOSING_COMPONENT_RULE)               \
2761       {                                                                 \
2762         component_len = component_idx;                                  \
2763         composition_state = COMPOSING_CHAR;                             \
2764       }                                                                 \
2765     else                                                                \
2766       {                                                                 \
2767         const unsigned char *p;                                         \
2768                                                                         \
2769         MAYBE_FINISH_COMPOSITION ();                                    \
2770         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
2771           goto no_more_source;                                          \
2772         for (p = src; p < src_end - 1; p++)                             \
2773           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
2774             break;                                                      \
2775         if (p == src_end - 1)                                           \
2776           {                                                             \
2777             if (coding->mode & CODING_MODE_LAST_BLOCK)                  \
2778               goto invalid_code;                                        \
2779             goto no_more_source;                                        \
2780           }                                                             \
2781                                                                         \
2782         /* This is surely the start of a composition.  */               \
2783         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
2784                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
2785                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
2786                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
2787         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
2788                              : COMPOSING_COMPONENT_CHAR);               \
2789         component_idx = component_len = 0;                              \
2790       }                                                                 \
2791   } while (0)
2792
2793
2794 /* Handle compositoin end sequence ESC 1.  */
2795
2796 #define DECODE_COMPOSITION_END()                                        \
2797   do {                                                                  \
2798     int nchars = (component_len > 0 ? component_idx - component_len     \
2799                   : method == COMPOSITION_RELATIVE ? component_idx      \
2800                   : (component_idx + 1) / 2);                           \
2801     int i;                                                              \
2802     int *saved_charbuf = charbuf;                                       \
2803                                                                         \
2804     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
2805     if (method != COMPOSITION_RELATIVE)                                 \
2806       {                                                                 \
2807         if (component_len == 0)                                         \
2808           for (i = 0; i < component_idx; i++)                           \
2809             *charbuf++ = components[i];                                 \
2810         else                                                            \
2811           for (i = 0; i < component_len; i++)                           \
2812             *charbuf++ = components[i];                                 \
2813         *saved_charbuf = saved_charbuf - charbuf;                       \
2814       }                                                                 \
2815     if (method == COMPOSITION_WITH_RULE)                                \
2816       for (i = 0; i < component_idx; i += 2, char_offset++)             \
2817         *charbuf++ = components[i];                                     \
2818     else                                                                \
2819       for (i = component_len; i < component_idx; i++, char_offset++)    \
2820         *charbuf++ = components[i];                                     \
2821     coding->annotated = 1;                                              \
2822     composition_state = COMPOSING_NO;                                   \
2823   } while (0)
2824
2825
2826 /* Decode a composition rule from the byte C1 (and maybe one more byte
2827    from SRC) and store one encoded composition rule in
2828    coding->cmp_data.  */
2829
2830 #define DECODE_COMPOSITION_RULE(c1)                                     \
2831   do {                                                                  \
2832     (c1) -= 32;                                                         \
2833     if (c1 < 81)                /* old format (before ver.21) */        \
2834       {                                                                 \
2835         int gref = (c1) / 9;                                            \
2836         int nref = (c1) % 9;                                            \
2837         if (gref == 4) gref = 10;                                       \
2838         if (nref == 4) nref = 10;                                       \
2839         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
2840       }                                                                 \
2841     else if (c1 < 93)           /* new format (after ver.21) */         \
2842       {                                                                 \
2843         ONE_MORE_BYTE (c2);                                             \
2844         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
2845       }                                                                 \
2846     else                                                                \
2847       c1 = 0;                                                           \
2848   } while (0)
2849
2850
2851 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2852
2853 static void
2854 decode_coding_iso_2022 (coding)
2855      struct coding_system *coding;
2856 {
2857   const unsigned char *src = coding->source + coding->consumed;
2858   const unsigned char *src_end = coding->source + coding->src_bytes;
2859   const unsigned char *src_base;
2860   int *charbuf = coding->charbuf + coding->charbuf_used;
2861   int *charbuf_end
2862     = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
2863   int consumed_chars = 0, consumed_chars_base;
2864   int multibytep = coding->src_multibyte;
2865   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
2866   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2867   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2868   struct charset *charset;
2869   int c;
2870   /* For handling composition sequence.  */
2871 #define COMPOSING_NO                    0
2872 #define COMPOSING_CHAR                  1
2873 #define COMPOSING_RULE                  2
2874 #define COMPOSING_COMPONENT_CHAR        3
2875 #define COMPOSING_COMPONENT_RULE        4
2876
2877   int composition_state = COMPOSING_NO;
2878   enum composition_method method;
2879   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2880   int component_idx;
2881   int component_len;
2882   Lisp_Object attrs, charset_list;
2883   int char_offset = coding->produced_char;
2884   int last_offset = char_offset;
2885   int last_id = charset_ascii;
2886
2887   CODING_GET_INFO (coding, attrs, charset_list);
2888   setup_iso_safe_charsets (attrs);
2889
2890   while (1)
2891     {
2892       int c1, c2;
2893
2894       src_base = src;
2895       consumed_chars_base = consumed_chars;
2896
2897       if (charbuf >= charbuf_end)
2898         break;
2899
2900       ONE_MORE_BYTE (c1);
2901       if (c1 < 0)
2902         goto invalid_code;
2903
2904       /* We produce at most one character.  */
2905       switch (iso_code_class [c1])
2906         {
2907         case ISO_0x20_or_0x7F:
2908           if (composition_state != COMPOSING_NO)
2909             {
2910               if (composition_state == COMPOSING_RULE
2911                   || composition_state == COMPOSING_COMPONENT_RULE)
2912                 {
2913                   DECODE_COMPOSITION_RULE (c1);
2914                   components[component_idx++] = c1;
2915                   composition_state--;
2916                   continue;
2917                 }
2918             }
2919           if (charset_id_0 < 0
2920               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2921             /* This is SPACE or DEL.  */
2922             charset = CHARSET_FROM_ID (charset_ascii);
2923           else
2924             charset = CHARSET_FROM_ID (charset_id_0);
2925           break;
2926
2927         case ISO_graphic_plane_0:
2928           if (composition_state != COMPOSING_NO)
2929             {
2930               if (composition_state == COMPOSING_RULE
2931                   || composition_state == COMPOSING_COMPONENT_RULE)
2932                 {
2933                   DECODE_COMPOSITION_RULE (c1);
2934                   components[component_idx++] = c1;
2935                   composition_state--;
2936                   continue;
2937                 }
2938             }
2939           charset = CHARSET_FROM_ID (charset_id_0);
2940           break;
2941
2942         case ISO_0xA0_or_0xFF:
2943           if (charset_id_1 < 0
2944               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2945               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2946             goto invalid_code;
2947           /* This is a graphic character, we fall down ... */
2948
2949         case ISO_graphic_plane_1:
2950           if (charset_id_1 < 0)
2951             goto invalid_code;
2952           charset = CHARSET_FROM_ID (charset_id_1);
2953           break;
2954
2955         case ISO_control_0:
2956           MAYBE_FINISH_COMPOSITION ();
2957           charset = CHARSET_FROM_ID (charset_ascii);
2958           break;
2959
2960         case ISO_control_1:
2961           MAYBE_FINISH_COMPOSITION ();
2962           goto invalid_code;
2963
2964         case ISO_shift_out:
2965           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2966               || CODING_ISO_DESIGNATION (coding, 1) < 0)
2967             goto invalid_code;
2968           CODING_ISO_INVOCATION (coding, 0) = 1;
2969           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2970           continue;
2971
2972         case ISO_shift_in:
2973           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2974             goto invalid_code;
2975           CODING_ISO_INVOCATION (coding, 0) = 0;
2976           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2977           continue;
2978
2979         case ISO_single_shift_2_7:
2980         case ISO_single_shift_2:
2981           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2982             goto invalid_code;
2983           /* SS2 is handled as an escape sequence of ESC 'N' */
2984           c1 = 'N';
2985           goto label_escape_sequence;
2986
2987         case ISO_single_shift_3:
2988           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2989             goto invalid_code;
2990           /* SS2 is handled as an escape sequence of ESC 'O' */
2991           c1 = 'O';
2992           goto label_escape_sequence;
2993
2994         case ISO_control_sequence_introducer:
2995           /* CSI is handled as an escape sequence of ESC '[' ...  */
2996           c1 = '[';
2997           goto label_escape_sequence;
2998
2999         case ISO_escape:
3000           ONE_MORE_BYTE (c1);
3001         label_escape_sequence:
3002           /* Escape sequences handled here are invocation,
3003              designation, direction specification, and character
3004              composition specification.  */
3005           switch (c1)
3006             {
3007             case '&':           /* revision of following character set */
3008               ONE_MORE_BYTE (c1);
3009               if (!(c1 >= '@' && c1 <= '~'))
3010                 goto invalid_code;
3011               ONE_MORE_BYTE (c1);
3012               if (c1 != ISO_CODE_ESC)
3013                 goto invalid_code;
3014               ONE_MORE_BYTE (c1);
3015               goto label_escape_sequence;
3016
3017             case '$':           /* designation of 2-byte character set */
3018               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3019                 goto invalid_code;
3020               ONE_MORE_BYTE (c1);
3021               if (c1 >= '@' && c1 <= 'B')
3022                 {       /* designation of JISX0208.1978, GB2312.1980,
3023                            or JISX0208.1980 */
3024                   DECODE_DESIGNATION (0, 2, 0, c1);
3025                 }
3026               else if (c1 >= 0x28 && c1 <= 0x2B)
3027                 {       /* designation of DIMENSION2_CHARS94 character set */
3028                   ONE_MORE_BYTE (c2);
3029                   DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
3030                 }
3031               else if (c1 >= 0x2C && c1 <= 0x2F)
3032                 {       /* designation of DIMENSION2_CHARS96 character set */
3033                   ONE_MORE_BYTE (c2);
3034                   DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
3035                 }
3036               else
3037                 goto invalid_code;
3038               /* We must update these variables now.  */
3039               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3040               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3041               continue;
3042
3043             case 'n':           /* invocation of locking-shift-2 */
3044               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3045                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3046                 goto invalid_code;
3047               CODING_ISO_INVOCATION (coding, 0) = 2;
3048               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3049               continue;
3050
3051             case 'o':           /* invocation of locking-shift-3 */
3052               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3053                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3054                 goto invalid_code;
3055               CODING_ISO_INVOCATION (coding, 0) = 3;
3056               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3057               continue;
3058
3059             case 'N':           /* invocation of single-shift-2 */
3060               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3061                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3062                 goto invalid_code;
3063               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
3064               ONE_MORE_BYTE (c1);
3065               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3066                 goto invalid_code;
3067               break;
3068
3069             case 'O':           /* invocation of single-shift-3 */
3070               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3071                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3072                 goto invalid_code;
3073               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
3074               ONE_MORE_BYTE (c1);
3075               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3076                 goto invalid_code;
3077               break;
3078
3079             case '0': case '2': case '3': case '4': /* start composition */
3080               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3081                 goto invalid_code;
3082               DECODE_COMPOSITION_START (c1);
3083               continue;
3084
3085             case '1':           /* end composition */
3086               if (composition_state == COMPOSING_NO)
3087                 goto invalid_code;
3088               DECODE_COMPOSITION_END ();
3089               continue;
3090
3091             case '[':           /* specification of direction */
3092               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3093                 goto invalid_code;
3094               /* For the moment, nested direction is not supported.
3095                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3096                  left-to-right, and nozero means right-to-left.  */
3097               ONE_MORE_BYTE (c1);
3098               switch (c1)
3099                 {
3100                 case ']':       /* end of the current direction */
3101                   coding->mode &= ~CODING_MODE_DIRECTION;
3102
3103                 case '0':       /* end of the current direction */
3104                 case '1':       /* start of left-to-right direction */
3105                   ONE_MORE_BYTE (c1);
3106                   if (c1 == ']')
3107                     coding->mode &= ~CODING_MODE_DIRECTION;
3108                   else
3109                     goto invalid_code;
3110                   break;
3111
3112                 case '2':       /* start of right-to-left direction */
3113                   ONE_MORE_BYTE (c1);
3114                   if (c1 == ']')
3115                     coding->mode |= CODING_MODE_DIRECTION;
3116                   else
3117                     goto invalid_code;
3118                   break;
3119
3120                 default:
3121                   goto invalid_code;
3122                 }
3123               continue;
3124
3125             case '%':
3126               ONE_MORE_BYTE (c1);
3127               if (c1 == '/')
3128                 {
3129                   /* CTEXT extended segment:
3130                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3131                      We keep these bytes as is for the moment.
3132                      They may be decoded by post-read-conversion.  */
3133                   int dim, M, L;
3134                   int size;
3135
3136                   ONE_MORE_BYTE (dim);
3137                   ONE_MORE_BYTE (M);
3138                   ONE_MORE_BYTE (L);
3139                   size = ((M - 128) * 128) + (L - 128);
3140                   if (charbuf + 8 + size > charbuf_end)
3141                     goto break_loop;
3142                   *charbuf++ = ISO_CODE_ESC;
3143                   *charbuf++ = '%';
3144                   *charbuf++ = '/';
3145                   *charbuf++ = dim;
3146                   *charbuf++ = BYTE8_TO_CHAR (M);
3147                   *charbuf++ = BYTE8_TO_CHAR (L);
3148                   while (size-- > 0)
3149                     {
3150                       ONE_MORE_BYTE (c1);
3151                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3152                     }
3153                 }
3154               else if (c1 == 'G')
3155                 {
3156                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3157                      ESC % G --UTF-8-BYTES-- ESC % @
3158                      We keep these bytes as is for the moment.
3159                      They may be decoded by post-read-conversion.  */
3160                   int *p = charbuf;
3161
3162                   if (p + 6 > charbuf_end)
3163                     goto break_loop;
3164                   *p++ = ISO_CODE_ESC;
3165                   *p++ = '%';
3166                   *p++ = 'G';
3167                   while (p < charbuf_end)
3168                     {
3169                       ONE_MORE_BYTE (c1);
3170                       if (c1 == ISO_CODE_ESC
3171                           && src + 1 < src_end
3172                           && src[0] == '%'
3173                           && src[1] == '@')
3174                         break;
3175                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3176                     }
3177                   if (p + 3 > charbuf_end)
3178                     goto break_loop;
3179                   *p++ = ISO_CODE_ESC;
3180                   *p++ = '%';
3181                   *p++ = '@';
3182                   charbuf = p;
3183                 }
3184               else
3185                 goto invalid_code;
3186               continue;
3187               break;
3188
3189             default:
3190               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3191                 goto invalid_code;
3192               if (c1 >= 0x28 && c1 <= 0x2B)
3193                 {       /* designation of DIMENSION1_CHARS94 character set */
3194                   ONE_MORE_BYTE (c2);
3195                   DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
3196                 }
3197               else if (c1 >= 0x2C && c1 <= 0x2F)
3198                 {       /* designation of DIMENSION1_CHARS96 character set */
3199                   ONE_MORE_BYTE (c2);
3200                   DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
3201                 }
3202               else
3203                 goto invalid_code;
3204               /* We must update these variables now.  */
3205               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3206               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3207               continue;
3208             }
3209         }
3210
3211       if (charset->id != charset_ascii
3212           && last_id != charset->id)
3213         {
3214           if (last_id != charset_ascii)
3215             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3216           last_id = charset->id;
3217           last_offset = char_offset;
3218         }
3219
3220       /* Now we know CHARSET and 1st position code C1 of a character.
3221          Produce a decoded character while getting 2nd position code
3222          C2 if necessary.  */
3223       c1 &= 0x7F;
3224       if (CHARSET_DIMENSION (charset) > 1)
3225         {
3226           ONE_MORE_BYTE (c2);
3227           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3228             /* C2 is not in a valid range.  */
3229             goto invalid_code;
3230           c1 = (c1 << 8) | (c2 & 0x7F);
3231           if (CHARSET_DIMENSION (charset) > 2)
3232             {
3233               ONE_MORE_BYTE (c2);
3234               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3235                 /* C2 is not in a valid range.  */
3236                 goto invalid_code;
3237               c1 = (c1 << 8) | (c2 & 0x7F);
3238             }
3239         }
3240
3241       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3242       if (c < 0)
3243         {
3244           MAYBE_FINISH_COMPOSITION ();
3245           for (; src_base < src; src_base++, char_offset++)
3246             {
3247               if (ASCII_BYTE_P (*src_base))
3248                 *charbuf++ = *src_base;
3249               else
3250                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3251             }
3252         }
3253       else if (composition_state == COMPOSING_NO)
3254         {
3255           *charbuf++ = c;
3256           char_offset++;
3257         }
3258       else
3259         {
3260           components[component_idx++] = c;
3261           if (method == COMPOSITION_WITH_RULE
3262               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3263                   && composition_state == COMPOSING_COMPONENT_CHAR))
3264             composition_state++;
3265         }
3266       continue;
3267
3268     invalid_code:
3269       MAYBE_FINISH_COMPOSITION ();
3270       src = src_base;
3271       consumed_chars = consumed_chars_base;
3272       ONE_MORE_BYTE (c);
3273       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3274       char_offset++;
3275       coding->errors++;
3276       continue;
3277
3278     break_loop:
3279       break;
3280     }
3281
3282  no_more_source:
3283   if (last_id != charset_ascii)
3284     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3285   coding->consumed_char += consumed_chars_base;
3286   coding->consumed = src_base - coding->source;
3287   coding->charbuf_used = charbuf - coding->charbuf;
3288 }
3289
3290
3291 /* ISO2022 encoding stuff.  */
3292
3293 /*
3294    It is not enough to say just "ISO2022" on encoding, we have to
3295    specify more details.  In Emacs, each coding system of ISO2022
3296    variant has the following specifications:
3297         1. Initial designation to G0 thru G3.
3298         2. Allows short-form designation?
3299         3. ASCII should be designated to G0 before control characters?
3300         4. ASCII should be designated to G0 at end of line?
3301         5. 7-bit environment or 8-bit environment?
3302         6. Use locking-shift?
3303         7. Use Single-shift?
3304    And the following two are only for Japanese:
3305         8. Use ASCII in place of JIS0201-1976-Roman?
3306         9. Use JISX0208-1983 in place of JISX0208-1978?
3307    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3308    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3309    details.
3310 */
3311
3312 /* Produce codes (escape sequence) for designating CHARSET to graphic
3313    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3314    '@', 'A', or 'B' and the coding system CODING allows, produce
3315    designation sequence of short-form.  */
3316
3317 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3318   do {                                                                  \
3319     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3320     char *intermediate_char_94 = "()*+";                                \
3321     char *intermediate_char_96 = ",-./";                                \
3322     int revision = -1;                                                  \
3323     int c;                                                              \
3324                                                                         \
3325     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3326       revision = CHARSET_ISO_REVISION (charset);                        \
3327                                                                         \
3328     if (revision >= 0)                                                  \
3329       {                                                                 \
3330         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3331         EMIT_ONE_BYTE ('@' + revision);                                 \
3332       }                                                                 \
3333     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3334     if (CHARSET_DIMENSION (charset) == 1)                               \
3335       {                                                                 \
3336         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3337           c = intermediate_char_94[reg];                                \
3338         else                                                            \
3339           c = intermediate_char_96[reg];                                \
3340         EMIT_ONE_ASCII_BYTE (c);                                        \
3341       }                                                                 \
3342     else                                                                \
3343       {                                                                 \
3344         EMIT_ONE_ASCII_BYTE ('$');                                      \
3345         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3346           {                                                             \
3347             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3348                 || reg != 0                                             \
3349                 || final_char < '@' || final_char > 'B')                \
3350               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3351           }                                                             \
3352         else                                                            \
3353           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3354       }                                                                 \
3355     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3356                                                                         \
3357     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3358   } while (0)
3359
3360
3361 /* The following two macros produce codes (control character or escape
3362    sequence) for ISO2022 single-shift functions (single-shift-2 and
3363    single-shift-3).  */
3364
3365 #define ENCODE_SINGLE_SHIFT_2                                           \
3366   do {                                                                  \
3367     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3368       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3369     else                                                                \
3370       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3371     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3372   } while (0)
3373
3374
3375 #define ENCODE_SINGLE_SHIFT_3                                           \
3376   do {                                                                  \
3377     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3378       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3379     else                                                                \
3380       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3381     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3382   } while (0)
3383
3384
3385 /* The following four macros produce codes (control character or
3386    escape sequence) for ISO2022 locking-shift functions (shift-in,
3387    shift-out, locking-shift-2, and locking-shift-3).  */
3388
3389 #define ENCODE_SHIFT_IN                                 \
3390   do {                                                  \
3391     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3392     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3393   } while (0)
3394
3395
3396 #define ENCODE_SHIFT_OUT                                \
3397   do {                                                  \
3398     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3399     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3400   } while (0)
3401
3402
3403 #define ENCODE_LOCKING_SHIFT_2                          \
3404   do {                                                  \
3405     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3406     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3407   } while (0)
3408
3409
3410 #define ENCODE_LOCKING_SHIFT_3                          \
3411   do {                                                  \
3412     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3413     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3414   } while (0)
3415
3416
3417 /* Produce codes for a DIMENSION1 character whose character set is
3418    CHARSET and whose position-code is C1.  Designation and invocation
3419    sequences are also produced in advance if necessary.  */
3420
3421 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3422   do {                                                                  \
3423     int id = CHARSET_ID (charset);                                      \
3424                                                                         \
3425     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3426         && id == charset_ascii)                                         \
3427       {                                                                 \
3428         id = charset_jisx0201_roman;                                    \
3429         charset = CHARSET_FROM_ID (id);                                 \
3430       }                                                                 \
3431                                                                         \
3432     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3433       {                                                                 \
3434         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3435           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3436         else                                                            \
3437           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3438         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3439         break;                                                          \
3440       }                                                                 \
3441     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3442       {                                                                 \
3443         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3444         break;                                                          \
3445       }                                                                 \
3446     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3447       {                                                                 \
3448         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3449         break;                                                          \
3450       }                                                                 \
3451     else                                                                \
3452       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3453          must invoke it, or, at first, designate it to some graphic     \
3454          register.  Then repeat the loop to actually produce the        \
3455          character.  */                                                 \
3456       dst = encode_invocation_designation (charset, coding, dst,        \
3457                                            &produced_chars);            \
3458   } while (1)
3459
3460
3461 /* Produce codes for a DIMENSION2 character whose character set is
3462    CHARSET and whose position-codes are C1 and C2.  Designation and
3463    invocation codes are also produced in advance if necessary.  */
3464
3465 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3466   do {                                                                  \
3467     int id = CHARSET_ID (charset);                                      \
3468                                                                         \
3469     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3470         && id == charset_jisx0208)                                      \
3471       {                                                                 \
3472         id = charset_jisx0208_1978;                                     \
3473         charset = CHARSET_FROM_ID (id);                                 \
3474       }                                                                 \
3475                                                                         \
3476     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3477       {                                                                 \
3478         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3479           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3480         else                                                            \
3481           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3482         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3483         break;                                                          \
3484       }                                                                 \
3485     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3486       {                                                                 \
3487         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3488         break;                                                          \
3489       }                                                                 \
3490     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3491       {                                                                 \
3492         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3493         break;                                                          \
3494       }                                                                 \
3495     else                                                                \
3496       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3497          must invoke it, or, at first, designate it to some graphic     \
3498          register.  Then repeat the loop to actually produce the        \
3499          character.  */                                                 \
3500       dst = encode_invocation_designation (charset, coding, dst,        \
3501                                            &produced_chars);            \
3502   } while (1)
3503
3504
3505 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3506   do {                                                                     \
3507     int code = ENCODE_CHAR ((charset),(c));                                \
3508                                                                            \
3509     if (CHARSET_DIMENSION (charset) == 1)                                  \
3510       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3511     else                                                                   \
3512       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3513   } while (0)
3514
3515
3516 /* Produce designation and invocation codes at a place pointed by DST
3517    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3518    Return new DST.  */
3519
3520 unsigned char *
3521 encode_invocation_designation (charset, coding, dst, p_nchars)
3522      struct charset *charset;
3523      struct coding_system *coding;
3524      unsigned char *dst;
3525      int *p_nchars;
3526 {
3527   int multibytep = coding->dst_multibyte;
3528   int produced_chars = *p_nchars;
3529   int reg;                      /* graphic register number */
3530   int id = CHARSET_ID (charset);
3531
3532   /* At first, check designations.  */
3533   for (reg = 0; reg < 4; reg++)
3534     if (id == CODING_ISO_DESIGNATION (coding, reg))
3535       break;
3536
3537   if (reg >= 4)
3538     {
3539       /* CHARSET is not yet designated to any graphic registers.  */
3540       /* At first check the requested designation.  */
3541       reg = CODING_ISO_REQUEST (coding, id);
3542       if (reg < 0)
3543         /* Since CHARSET requests no special designation, designate it
3544            to graphic register 0.  */
3545         reg = 0;
3546
3547       ENCODE_DESIGNATION (charset, reg, coding);
3548     }
3549
3550   if (CODING_ISO_INVOCATION (coding, 0) != reg
3551       && CODING_ISO_INVOCATION (coding, 1) != reg)
3552     {
3553       /* Since the graphic register REG is not invoked to any graphic
3554          planes, invoke it to graphic plane 0.  */
3555       switch (reg)
3556         {
3557         case 0:                 /* graphic register 0 */
3558           ENCODE_SHIFT_IN;
3559           break;
3560
3561         case 1:                 /* graphic register 1 */
3562           ENCODE_SHIFT_OUT;
3563           break;
3564
3565         case 2:                 /* graphic register 2 */
3566           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3567             ENCODE_SINGLE_SHIFT_2;
3568           else
3569             ENCODE_LOCKING_SHIFT_2;
3570           break;
3571
3572         case 3:                 /* graphic register 3 */
3573           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3574             ENCODE_SINGLE_SHIFT_3;
3575           else
3576             ENCODE_LOCKING_SHIFT_3;
3577           break;
3578         }
3579     }
3580
3581   *p_nchars = produced_chars;
3582   return dst;
3583 }
3584
3585 /* The following three macros produce codes for indicating direction
3586    of text.  */
3587 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3588   do {                                                                  \
3589     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3590       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3591     else                                                                \
3592       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3593   } while (0)
3594
3595
3596 #define ENCODE_DIRECTION_R2L()                  \
3597   do {                                          \
3598     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3599     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3600   } while (0)
3601
3602
3603 #define ENCODE_DIRECTION_L2R()                  \
3604   do {                                          \
3605     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3606     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3607   } while (0)
3608
3609
3610 /* Produce codes for designation and invocation to reset the graphic
3611    planes and registers to initial state.  */
3612 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3613   do {                                                                  \
3614     int reg;                                                            \
3615     struct charset *charset;                                            \
3616                                                                         \
3617     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3618       ENCODE_SHIFT_IN;                                                  \
3619     for (reg = 0; reg < 4; reg++)                                       \
3620       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3621           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3622               != CODING_ISO_INITIAL (coding, reg)))                     \
3623         {                                                               \
3624           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3625           ENCODE_DESIGNATION (charset, reg, coding);                    \
3626         }                                                               \
3627   } while (0)
3628
3629
3630 /* Produce designation sequences of charsets in the line started from
3631    SRC to a place pointed by DST, and return updated DST.
3632
3633    If the current block ends before any end-of-line, we may fail to
3634    find all the necessary designations.  */
3635
3636 static unsigned char *
3637 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3638      struct coding_system *coding;
3639      int *charbuf, *charbuf_end;
3640      unsigned char *dst;
3641 {
3642   struct charset *charset;
3643   /* Table of charsets to be designated to each graphic register.  */
3644   int r[4];
3645   int c, found = 0, reg;
3646   int produced_chars = 0;
3647   int multibytep = coding->dst_multibyte;
3648   Lisp_Object attrs;
3649   Lisp_Object charset_list;
3650
3651   attrs = CODING_ID_ATTRS (coding->id);
3652   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3653   if (EQ (charset_list, Qiso_2022))
3654     charset_list = Viso_2022_charset_list;
3655
3656   for (reg = 0; reg < 4; reg++)
3657     r[reg] = -1;
3658
3659   while (found < 4)
3660     {
3661       int id;
3662
3663       c = *charbuf++;
3664       if (c == '\n')
3665         break;
3666       charset = char_charset (c, charset_list, NULL);
3667       id = CHARSET_ID (charset);
3668       reg = CODING_ISO_REQUEST (coding, id);
3669       if (reg >= 0 && r[reg] < 0)
3670         {
3671           found++;
3672           r[reg] = id;
3673         }
3674     }
3675
3676   if (found)
3677     {
3678       for (reg = 0; reg < 4; reg++)
3679         if (r[reg] >= 0
3680             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3681           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3682     }
3683
3684   return dst;
3685 }
3686
3687 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
3688
3689 static int
3690 encode_coding_iso_2022 (coding)
3691      struct coding_system *coding;
3692 {
3693   int multibytep = coding->dst_multibyte;
3694   int *charbuf = coding->charbuf;
3695   int *charbuf_end = charbuf + coding->charbuf_used;
3696   unsigned char *dst = coding->destination + coding->produced;
3697   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3698   int safe_room = 16;
3699   int bol_designation
3700     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3701        && CODING_ISO_BOL (coding));
3702   int produced_chars = 0;
3703   Lisp_Object attrs, eol_type, charset_list;
3704   int ascii_compatible;
3705   int c;
3706   int preferred_charset_id = -1;
3707
3708   CODING_GET_INFO (coding, attrs, charset_list);
3709   eol_type = CODING_ID_EOL_TYPE (coding->id);
3710   if (VECTORP (eol_type))
3711     eol_type = Qunix;
3712
3713   setup_iso_safe_charsets (attrs);
3714   /* Charset list may have been changed.  */
3715   charset_list = CODING_ATTR_CHARSET_LIST (attrs);              \
3716   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3717
3718   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3719
3720   while (charbuf < charbuf_end)
3721     {
3722       ASSURE_DESTINATION (safe_room);
3723
3724       if (bol_designation)
3725         {
3726           unsigned char *dst_prev = dst;
3727
3728           /* We have to produce designation sequences if any now.  */
3729           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3730           bol_designation = 0;
3731           /* We are sure that designation sequences are all ASCII bytes.  */
3732           produced_chars += dst - dst_prev;
3733         }
3734
3735       c = *charbuf++;
3736
3737       if (c < 0)
3738         {
3739           /* Handle an annotation.  */
3740           switch (*charbuf)
3741             {
3742             case CODING_ANNOTATE_COMPOSITION_MASK:
3743               /* Not yet implemented.  */
3744               break;
3745             case CODING_ANNOTATE_CHARSET_MASK:
3746               preferred_charset_id = charbuf[3];
3747               if (preferred_charset_id >= 0
3748                   && NILP (Fmemq (make_number (preferred_charset_id),
3749                                   charset_list)))
3750                 preferred_charset_id = -1;
3751               break;
3752             default:
3753               abort ();
3754             }
3755           charbuf += -c - 1;
3756           continue;
3757         }
3758
3759       /* Now encode the character C.  */
3760       if (c < 0x20 || c == 0x7F)
3761         {
3762           if (c == '\n'
3763               || (c == '\r' && EQ (eol_type, Qmac)))
3764             {
3765               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3766                 ENCODE_RESET_PLANE_AND_REGISTER ();
3767               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3768                 {
3769                   int i;
3770
3771                   for (i = 0; i < 4; i++)
3772                     CODING_ISO_DESIGNATION (coding, i)
3773                       = CODING_ISO_INITIAL (coding, i);
3774                 }
3775               bol_designation
3776                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3777             }
3778           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3779             ENCODE_RESET_PLANE_AND_REGISTER ();
3780           EMIT_ONE_ASCII_BYTE (c);
3781         }
3782       else if (ASCII_CHAR_P (c))
3783         {
3784           if (ascii_compatible)
3785             EMIT_ONE_ASCII_BYTE (c);
3786           else
3787             {
3788               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3789               ENCODE_ISO_CHARACTER (charset, c);
3790             }
3791         }
3792       else if (CHAR_BYTE8_P (c))
3793         {
3794           c = CHAR_TO_BYTE8 (c);
3795           EMIT_ONE_BYTE (c);
3796         }
3797       else
3798         {
3799           struct charset *charset;
3800
3801           if (preferred_charset_id >= 0)
3802             {
3803               charset = CHARSET_FROM_ID (preferred_charset_id);
3804               if (! CHAR_CHARSET_P (c, charset))
3805                 charset = char_charset (c, charset_list, NULL);
3806             }
3807           else
3808             charset = char_charset (c, charset_list, NULL);
3809           if (!charset)
3810             {
3811               if (coding->mode & CODING_MODE_SAFE_ENCODING)
3812                 {
3813                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3814                   charset = CHARSET_FROM_ID (charset_ascii);
3815                 }
3816               else
3817                 {
3818                   c = coding->default_char;
3819                   charset = char_charset (c, charset_list, NULL);
3820                 }
3821             }
3822           ENCODE_ISO_CHARACTER (charset, c);
3823         }
3824     }
3825
3826   if (coding->mode & CODING_MODE_LAST_BLOCK
3827       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3828     {
3829       ASSURE_DESTINATION (safe_room);
3830       ENCODE_RESET_PLANE_AND_REGISTER ();
3831     }
3832   record_conversion_result (coding, CODING_RESULT_SUCCESS);
3833   CODING_ISO_BOL (coding) = bol_designation;
3834   coding->produced_char += produced_chars;
3835   coding->produced = dst - coding->destination;
3836   return 0;
3837 }
3838
3839 \f
3840 /*** 8,9. SJIS and BIG5 handlers ***/
3841
3842 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3843    quite widely.  So, for the moment, Emacs supports them in the bare
3844    C code.  But, in the future, they may be supported only by CCL.  */
3845
3846 /* SJIS is a coding system encoding three character sets: ASCII, right
3847    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
3848    as is.  A character of charset katakana-jisx0201 is encoded by
3849    "position-code + 0x80".  A character of charset japanese-jisx0208
3850    is encoded in 2-byte but two position-codes are divided and shifted
3851    so that it fit in the range below.
3852
3853    --- CODE RANGE of SJIS ---
3854    (character set)      (range)
3855    ASCII                0x00 .. 0x7F
3856    KATAKANA-JISX0201    0xA0 .. 0xDF
3857    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
3858             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
3859    -------------------------------
3860
3861 */
3862
3863 /* BIG5 is a coding system encoding two character sets: ASCII and
3864    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
3865    character set and is encoded in two-byte.
3866
3867    --- CODE RANGE of BIG5 ---
3868    (character set)      (range)
3869    ASCII                0x00 .. 0x7F
3870    Big5 (1st byte)      0xA1 .. 0xFE
3871         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
3872    --------------------------
3873
3874   */
3875
3876 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3877    Check if a text is encoded in SJIS.  If it is, return
3878    CATEGORY_MASK_SJIS, else return 0.  */
3879
3880 static int
3881 detect_coding_sjis (coding, detect_info)
3882      struct coding_system *coding;
3883      struct coding_detection_info *detect_info;
3884 {
3885   const unsigned char *src = coding->source, *src_base;
3886   const unsigned char *src_end = coding->source + coding->src_bytes;
3887   int multibytep = coding->src_multibyte;
3888   int consumed_chars = 0;
3889   int found = 0;
3890   int c;
3891
3892   detect_info->checked |= CATEGORY_MASK_SJIS;
3893   /* A coding system of this category is always ASCII compatible.  */
3894   src += coding->head_ascii;
3895
3896   while (1)
3897     {
3898       src_base = src;
3899       ONE_MORE_BYTE (c);
3900       if (c < 0x80)
3901         continue;
3902       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3903         {
3904           ONE_MORE_BYTE (c);
3905           if (c < 0x40 || c == 0x7F || c > 0xFC)
3906             break;
3907           found = CATEGORY_MASK_SJIS;
3908         }
3909       else if (c >= 0xA0 && c < 0xE0)
3910         found = CATEGORY_MASK_SJIS;
3911       else
3912         break;
3913     }
3914   detect_info->rejected |= CATEGORY_MASK_SJIS;
3915   return 0;
3916
3917  no_more_source:
3918   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
3919     {
3920       detect_info->rejected |= CATEGORY_MASK_SJIS;
3921       return 0;
3922     }
3923   detect_info->found |= found;
3924   return 1;
3925 }
3926
3927 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3928    Check if a text is encoded in BIG5.  If it is, return
3929    CATEGORY_MASK_BIG5, else return 0.  */
3930
3931 static int
3932 detect_coding_big5 (coding, detect_info)
3933      struct coding_system *coding;
3934      struct coding_detection_info *detect_info;
3935 {
3936   const unsigned char *src = coding->source, *src_base;
3937   const unsigned char *src_end = coding->source + coding->src_bytes;
3938   int multibytep = coding->src_multibyte;
3939   int consumed_chars = 0;
3940   int found = 0;
3941   int c;
3942
3943   detect_info->checked |= CATEGORY_MASK_BIG5;
3944   /* A coding system of this category is always ASCII compatible.  */
3945   src += coding->head_ascii;
3946
3947   while (1)
3948     {
3949       src_base = src;
3950       ONE_MORE_BYTE (c);
3951       if (c < 0x80)
3952         continue;
3953       if (c >= 0xA1)
3954         {
3955           ONE_MORE_BYTE (c);
3956           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3957             return 0;
3958           found = CATEGORY_MASK_BIG5;
3959         }
3960       else
3961         break;
3962     }
3963   detect_info->rejected |= CATEGORY_MASK_BIG5;
3964   return 0;
3965
3966  no_more_source:
3967   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
3968     {
3969       detect_info->rejected |= CATEGORY_MASK_BIG5;
3970       return 0;
3971     }
3972   detect_info->found |= found;
3973   return 1;
3974 }
3975
3976 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3977    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3978
3979 static void
3980 decode_coding_sjis (coding)
3981      struct coding_system *coding;
3982 {
3983   const unsigned char *src = coding->source + coding->consumed;
3984   const unsigned char *src_end = coding->source + coding->src_bytes;
3985   const unsigned char *src_base;
3986   int *charbuf = coding->charbuf + coding->charbuf_used;
3987   int *charbuf_end
3988     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
3989   int consumed_chars = 0, consumed_chars_base;
3990   int multibytep = coding->src_multibyte;
3991   struct charset *charset_roman, *charset_kanji, *charset_kana;
3992   struct charset *charset_kanji2;
3993   Lisp_Object attrs, charset_list, val;
3994   int char_offset = coding->produced_char;
3995   int last_offset = char_offset;
3996   int last_id = charset_ascii;
3997
3998   CODING_GET_INFO (coding, attrs, charset_list);
3999
4000   val = charset_list;
4001   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4002   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4003   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4004   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4005
4006   while (1)
4007     {
4008       int c, c1;
4009       struct charset *charset;
4010
4011       src_base = src;
4012       consumed_chars_base = consumed_chars;
4013
4014       if (charbuf >= charbuf_end)
4015         break;
4016
4017       ONE_MORE_BYTE (c);
4018       if (c < 0)
4019         goto invalid_code;
4020       if (c < 0x80)
4021         charset = charset_roman;
4022       else if (c == 0x80 || c == 0xA0)
4023         goto invalid_code;
4024       else if (c >= 0xA1 && c <= 0xDF)
4025         {
4026           /* SJIS -> JISX0201-Kana */
4027           c &= 0x7F;
4028           charset = charset_kana;
4029         }
4030       else if (c <= 0xEF)
4031         {
4032           /* SJIS -> JISX0208 */
4033           ONE_MORE_BYTE (c1);
4034           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4035             goto invalid_code;
4036           c = (c << 8) | c1;
4037           SJIS_TO_JIS (c);
4038           charset = charset_kanji;
4039         }
4040       else if (c <= 0xFC && charset_kanji2)
4041         {
4042           /* SJIS -> JISX0213-2 */
4043           ONE_MORE_BYTE (c1);
4044           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4045             goto invalid_code;
4046           c = (c << 8) | c1;
4047           SJIS_TO_JIS2 (c);
4048           charset = charset_kanji2;
4049         }
4050       else
4051         goto invalid_code;
4052       if (charset->id != charset_ascii
4053           && last_id != charset->id)
4054         {
4055           if (last_id != charset_ascii)
4056             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4057           last_id = charset->id;
4058           last_offset = char_offset;
4059         }
4060       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4061       *charbuf++ = c;
4062       char_offset++;
4063       continue;
4064
4065     invalid_code:
4066       src = src_base;
4067       consumed_chars = consumed_chars_base;
4068       ONE_MORE_BYTE (c);
4069       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4070       char_offset++;
4071       coding->errors++;
4072     }
4073
4074  no_more_source:
4075   if (last_id != charset_ascii)
4076     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4077   coding->consumed_char += consumed_chars_base;
4078   coding->consumed = src_base - coding->source;
4079   coding->charbuf_used = charbuf - coding->charbuf;
4080 }
4081
4082 static void
4083 decode_coding_big5 (coding)
4084      struct coding_system *coding;
4085 {
4086   const unsigned char *src = coding->source + coding->consumed;
4087   const unsigned char *src_end = coding->source + coding->src_bytes;
4088   const unsigned char *src_base;
4089   int *charbuf = coding->charbuf + coding->charbuf_used;
4090   int *charbuf_end
4091     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4092   int consumed_chars = 0, consumed_chars_base;
4093   int multibytep = coding->src_multibyte;
4094   struct charset *charset_roman, *charset_big5;
4095   Lisp_Object attrs, charset_list, val;
4096   int char_offset = coding->produced_char;
4097   int last_offset = char_offset;
4098   int last_id = charset_ascii;
4099
4100   CODING_GET_INFO (coding, attrs, charset_list);
4101   val = charset_list;
4102   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4103   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4104
4105   while (1)
4106     {
4107       int c, c1;
4108       struct charset *charset;
4109
4110       src_base = src;
4111       consumed_chars_base = consumed_chars;
4112
4113       if (charbuf >= charbuf_end)
4114         break;
4115
4116       ONE_MORE_BYTE (c);
4117
4118       if (c < 0)
4119         goto invalid_code;
4120       if (c < 0x80)
4121         charset = charset_roman;
4122       else
4123         {
4124           /* BIG5 -> Big5 */
4125           if (c < 0xA1 || c > 0xFE)
4126             goto invalid_code;
4127           ONE_MORE_BYTE (c1);
4128           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4129             goto invalid_code;
4130           c = c << 8 | c1;
4131           charset = charset_big5;
4132         }
4133       if (charset->id != charset_ascii
4134           && last_id != charset->id)
4135         {
4136           if (last_id != charset_ascii)
4137             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4138           last_id = charset->id;
4139           last_offset = char_offset;
4140         }
4141       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4142       *charbuf++ = c;
4143       char_offset++;
4144       continue;
4145
4146     invalid_code:
4147       src = src_base;
4148       consumed_chars = consumed_chars_base;
4149       ONE_MORE_BYTE (c);
4150       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4151       char_offset++;
4152       coding->errors++;
4153     }
4154
4155  no_more_source:
4156   if (last_id != charset_ascii)
4157     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4158   coding->consumed_char += consumed_chars_base;
4159   coding->consumed = src_base - coding->source;
4160   coding->charbuf_used = charbuf - coding->charbuf;
4161 }
4162
4163 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4164    This function can encode charsets `ascii', `katakana-jisx0201',
4165    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4166    are sure that all these charsets are registered as official charset
4167    (i.e. do not have extended leading-codes).  Characters of other
4168    charsets are produced without any encoding.  If SJIS_P is 1, encode
4169    SJIS text, else encode BIG5 text.  */
4170
4171 static int
4172 encode_coding_sjis (coding)
4173      struct coding_system *coding;
4174 {
4175   int multibytep = coding->dst_multibyte;
4176   int *charbuf = coding->charbuf;
4177   int *charbuf_end = charbuf + coding->charbuf_used;
4178   unsigned char *dst = coding->destination + coding->produced;
4179   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4180   int safe_room = 4;
4181   int produced_chars = 0;
4182   Lisp_Object attrs, charset_list, val;
4183   int ascii_compatible;
4184   struct charset *charset_roman, *charset_kanji, *charset_kana;
4185   struct charset *charset_kanji2;
4186   int c;
4187
4188   CODING_GET_INFO (coding, attrs, charset_list);
4189   val = charset_list;
4190   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4191   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4192   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4193   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4194
4195   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4196
4197   while (charbuf < charbuf_end)
4198     {
4199       ASSURE_DESTINATION (safe_room);
4200       c = *charbuf++;
4201       /* Now encode the character C.  */
4202       if (ASCII_CHAR_P (c) && ascii_compatible)
4203         EMIT_ONE_ASCII_BYTE (c);
4204       else if (CHAR_BYTE8_P (c))
4205         {
4206           c = CHAR_TO_BYTE8 (c);
4207           EMIT_ONE_BYTE (c);
4208         }
4209       else
4210         {
4211           unsigned code;
4212           struct charset *charset = char_charset (c, charset_list, &code);
4213
4214           if (!charset)
4215             {
4216               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4217                 {
4218                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4219                   charset = CHARSET_FROM_ID (charset_ascii);
4220                 }
4221               else
4222                 {
4223                   c = coding->default_char;
4224                   charset = char_charset (c, charset_list, &code);
4225                 }
4226             }
4227           if (code == CHARSET_INVALID_CODE (charset))
4228             abort ();
4229           if (charset == charset_kanji)
4230             {
4231               int c1, c2;
4232               JIS_TO_SJIS (code);
4233               c1 = code >> 8, c2 = code & 0xFF;
4234               EMIT_TWO_BYTES (c1, c2);
4235             }
4236           else if (charset == charset_kana)
4237             EMIT_ONE_BYTE (code | 0x80);
4238           else if (charset_kanji2 && charset == charset_kanji2)
4239             {
4240               int c1, c2;
4241
4242               c1 = code >> 8;
4243               if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4244                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4245                 {
4246                   JIS_TO_SJIS2 (code);
4247                   c1 = code >> 8, c2 = code & 0xFF;
4248                   EMIT_TWO_BYTES (c1, c2);
4249                 }
4250               else
4251                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4252             }
4253           else
4254             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4255         }
4256     }
4257   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4258   coding->produced_char += produced_chars;
4259   coding->produced = dst - coding->destination;
4260   return 0;
4261 }
4262
4263 static int
4264 encode_coding_big5 (coding)
4265      struct coding_system *coding;
4266 {
4267   int multibytep = coding->dst_multibyte;
4268   int *charbuf = coding->charbuf;
4269   int *charbuf_end = charbuf + coding->charbuf_used;
4270   unsigned char *dst = coding->destination + coding->produced;
4271   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4272   int safe_room = 4;
4273   int produced_chars = 0;
4274   Lisp_Object attrs, charset_list, val;
4275   int ascii_compatible;
4276   struct charset *charset_roman, *charset_big5;
4277   int c;
4278
4279   CODING_GET_INFO (coding, attrs, charset_list);
4280   val = charset_list;
4281   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4282   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4283   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4284
4285   while (charbuf < charbuf_end)
4286     {
4287       ASSURE_DESTINATION (safe_room);
4288       c = *charbuf++;
4289       /* Now encode the character C.  */
4290       if (ASCII_CHAR_P (c) && ascii_compatible)
4291         EMIT_ONE_ASCII_BYTE (c);
4292       else if (CHAR_BYTE8_P (c))
4293         {
4294           c = CHAR_TO_BYTE8 (c);
4295           EMIT_ONE_BYTE (c);
4296         }
4297       else
4298         {
4299           unsigned code;
4300           struct charset *charset = char_charset (c, charset_list, &code);
4301
4302           if (! charset)
4303             {
4304               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4305                 {
4306                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4307                   charset = CHARSET_FROM_ID (charset_ascii);
4308                 }
4309               else
4310                 {
4311                   c = coding->default_char;
4312                   charset = char_charset (c, charset_list, &code);
4313                 }
4314             }
4315           if (code == CHARSET_INVALID_CODE (charset))
4316             abort ();
4317           if (charset == charset_big5)
4318             {
4319               int c1, c2;
4320
4321               c1 = code >> 8, c2 = code & 0xFF;
4322               EMIT_TWO_BYTES (c1, c2);
4323             }
4324           else
4325             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4326         }
4327     }
4328   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4329   coding->produced_char += produced_chars;
4330   coding->produced = dst - coding->destination;
4331   return 0;
4332 }
4333
4334 \f
4335 /*** 10. CCL handlers ***/
4336
4337 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4338    Check if a text is encoded in a coding system of which
4339    encoder/decoder are written in CCL program.  If it is, return
4340    CATEGORY_MASK_CCL, else return 0.  */
4341
4342 static int
4343 detect_coding_ccl (coding, detect_info)
4344      struct coding_system *coding;
4345      struct coding_detection_info *detect_info;
4346 {
4347   const unsigned char *src = coding->source, *src_base;
4348   const unsigned char *src_end = coding->source + coding->src_bytes;
4349   int multibytep = coding->src_multibyte;
4350   int consumed_chars = 0;
4351   int found = 0;
4352   unsigned char *valids;
4353   int head_ascii = coding->head_ascii;
4354   Lisp_Object attrs;
4355
4356   detect_info->checked |= CATEGORY_MASK_CCL;
4357
4358   coding = &coding_categories[coding_category_ccl];
4359   valids = CODING_CCL_VALIDS (coding);
4360   attrs = CODING_ID_ATTRS (coding->id);
4361   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4362     src += head_ascii;
4363
4364   while (1)
4365     {
4366       int c;
4367
4368       src_base = src;
4369       ONE_MORE_BYTE (c);
4370       if (c < 0 || ! valids[c])
4371         break;
4372       if ((valids[c] > 1))
4373         found = CATEGORY_MASK_CCL;
4374     }
4375   detect_info->rejected |= CATEGORY_MASK_CCL;
4376   return 0;
4377
4378  no_more_source:
4379   detect_info->found |= found;
4380   return 1;
4381 }
4382
4383 static void
4384 decode_coding_ccl (coding)
4385      struct coding_system *coding;
4386 {
4387   const unsigned char *src = coding->source + coding->consumed;
4388   const unsigned char *src_end = coding->source + coding->src_bytes;
4389   int *charbuf = coding->charbuf + coding->charbuf_used;
4390   int *charbuf_end = coding->charbuf + coding->charbuf_size;
4391   int consumed_chars = 0;
4392   int multibytep = coding->src_multibyte;
4393   struct ccl_program ccl;
4394   int source_charbuf[1024];
4395   int source_byteidx[1024];
4396   Lisp_Object attrs, charset_list;
4397
4398   CODING_GET_INFO (coding, attrs, charset_list);
4399   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4400
4401   while (src < src_end)
4402     {
4403       const unsigned char *p = src;
4404       int *source, *source_end;
4405       int i = 0;
4406
4407       if (multibytep)
4408         while (i < 1024 && p < src_end)
4409           {
4410             source_byteidx[i] = p - src;
4411             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4412           }
4413       else
4414         while (i < 1024 && p < src_end)
4415           source_charbuf[i++] = *p++;
4416
4417       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4418         ccl.last_block = 1;
4419
4420       source = source_charbuf;
4421       source_end = source + i;
4422       while (source < source_end)
4423         {
4424           ccl_driver (&ccl, source, charbuf,
4425                       source_end - source, charbuf_end - charbuf,
4426                       charset_list);
4427           source += ccl.consumed;
4428           charbuf += ccl.produced;
4429           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4430             break;
4431         }
4432       if (source < source_end)
4433         src += source_byteidx[source - source_charbuf];
4434       else
4435         src = p;
4436       consumed_chars += source - source_charbuf;
4437
4438       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4439           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4440         break;
4441     }
4442
4443   switch (ccl.status)
4444     {
4445     case CCL_STAT_SUSPEND_BY_SRC:
4446       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4447       break;
4448     case CCL_STAT_SUSPEND_BY_DST:
4449       break;
4450     case CCL_STAT_QUIT:
4451     case CCL_STAT_INVALID_CMD:
4452       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4453       break;
4454     default:
4455       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4456       break;
4457     }
4458   coding->consumed_char += consumed_chars;
4459   coding->consumed = src - coding->source;
4460   coding->charbuf_used = charbuf - coding->charbuf;
4461 }
4462
4463 static int
4464 encode_coding_ccl (coding)
4465      struct coding_system *coding;
4466 {
4467   struct ccl_program ccl;
4468   int multibytep = coding->dst_multibyte;
4469   int *charbuf = coding->charbuf;
4470   int *charbuf_end = charbuf + coding->charbuf_used;
4471   unsigned char *dst = coding->destination + coding->produced;
4472   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4473   unsigned char *adjusted_dst_end = dst_end - 1;
4474   int destination_charbuf[1024];
4475   int i, produced_chars = 0;
4476   Lisp_Object attrs, charset_list;
4477
4478   CODING_GET_INFO (coding, attrs, charset_list);
4479   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4480
4481   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4482   ccl.dst_multibyte = coding->dst_multibyte;
4483
4484   while (charbuf < charbuf_end && dst < adjusted_dst_end)
4485     {
4486       int dst_bytes = dst_end - dst;
4487       if (dst_bytes > 1024)
4488         dst_bytes = 1024;
4489
4490       ccl_driver (&ccl, charbuf, destination_charbuf,
4491                   charbuf_end - charbuf, dst_bytes, charset_list);
4492       charbuf += ccl.consumed;
4493       if (multibytep)
4494         for (i = 0; i < ccl.produced; i++)
4495           EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4496       else
4497         {
4498           for (i = 0; i < ccl.produced; i++)
4499             *dst++ = destination_charbuf[i] & 0xFF;
4500           produced_chars += ccl.produced;
4501         }
4502     }
4503
4504   switch (ccl.status)
4505     {
4506     case CCL_STAT_SUSPEND_BY_SRC:
4507       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4508       break;
4509     case CCL_STAT_SUSPEND_BY_DST:
4510       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4511       break;
4512     case CCL_STAT_QUIT:
4513     case CCL_STAT_INVALID_CMD:
4514       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4515       break;
4516     default:
4517       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4518       break;
4519     }
4520
4521   coding->produced_char += produced_chars;
4522   coding->produced = dst - coding->destination;
4523   return 0;
4524 }
4525
4526
4527 \f
4528 /*** 10, 11. no-conversion handlers ***/
4529
4530 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4531
4532 static void
4533 decode_coding_raw_text (coding)
4534      struct coding_system *coding;
4535 {
4536   coding->chars_at_source = 1;
4537   coding->consumed_char = 0;
4538   coding->consumed = 0;
4539   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4540 }
4541
4542 static int
4543 encode_coding_raw_text (coding)
4544      struct coding_system *coding;
4545 {
4546   int multibytep = coding->dst_multibyte;
4547   int *charbuf = coding->charbuf;
4548   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4549   unsigned char *dst = coding->destination + coding->produced;
4550   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4551   int produced_chars = 0;
4552   int c;
4553
4554   if (multibytep)
4555     {
4556       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4557
4558       if (coding->src_multibyte)
4559         while (charbuf < charbuf_end)
4560           {
4561             ASSURE_DESTINATION (safe_room);
4562             c = *charbuf++;
4563             if (ASCII_CHAR_P (c))
4564               EMIT_ONE_ASCII_BYTE (c);
4565             else if (CHAR_BYTE8_P (c))
4566               {
4567                 c = CHAR_TO_BYTE8 (c);
4568                 EMIT_ONE_BYTE (c);
4569               }
4570             else
4571               {
4572                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4573
4574                 CHAR_STRING_ADVANCE (c, p1);
4575                 while (p0 < p1)
4576                   {
4577                     EMIT_ONE_BYTE (*p0);
4578                     p0++;
4579                   }
4580               }
4581           }
4582       else
4583         while (charbuf < charbuf_end)
4584           {
4585             ASSURE_DESTINATION (safe_room);
4586             c = *charbuf++;
4587             EMIT_ONE_BYTE (c);
4588           }
4589     }
4590   else
4591     {
4592       if (coding->src_multibyte)
4593         {
4594           int safe_room = MAX_MULTIBYTE_LENGTH;
4595
4596           while (charbuf < charbuf_end)
4597             {
4598               ASSURE_DESTINATION (safe_room);
4599               c = *charbuf++;
4600               if (ASCII_CHAR_P (c))
4601                 *dst++ = c;
4602               else if (CHAR_BYTE8_P (c))
4603                 *dst++ = CHAR_TO_BYTE8 (c);
4604               else
4605                 CHAR_STRING_ADVANCE (c, dst);
4606               produced_chars++;
4607             }
4608         }
4609       else
4610         {
4611           ASSURE_DESTINATION (charbuf_end - charbuf);
4612           while (charbuf < charbuf_end && dst < dst_end)
4613             *dst++ = *charbuf++;
4614           produced_chars = dst - (coding->destination + coding->dst_bytes);
4615         }
4616     }
4617   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4618   coding->produced_char += produced_chars;
4619   coding->produced = dst - coding->destination;
4620   return 0;
4621 }
4622
4623 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4624    Check if a text is encoded in a charset-based coding system.  If it
4625    is, return 1, else return 0.  */
4626
4627 static int
4628 detect_coding_charset (coding, detect_info)
4629      struct coding_system *coding;
4630      struct coding_detection_info *detect_info;
4631 {
4632   const unsigned char *src = coding->source, *src_base;
4633   const unsigned char *src_end = coding->source + coding->src_bytes;
4634   int multibytep = coding->src_multibyte;
4635   int consumed_chars = 0;
4636   Lisp_Object attrs, valids;
4637   int found = 0;
4638
4639   detect_info->checked |= CATEGORY_MASK_CHARSET;
4640
4641   coding = &coding_categories[coding_category_charset];
4642   attrs = CODING_ID_ATTRS (coding->id);
4643   valids = AREF (attrs, coding_attr_charset_valids);
4644
4645   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4646     src += coding->head_ascii;
4647
4648   while (1)
4649     {
4650       int c;
4651
4652       src_base = src;
4653       ONE_MORE_BYTE (c);
4654       if (c < 0)
4655         continue;
4656       if (NILP (AREF (valids, c)))
4657         break;
4658       if (c >= 0x80)
4659         found = CATEGORY_MASK_CHARSET;
4660     }
4661   detect_info->rejected |= CATEGORY_MASK_CHARSET;
4662   return 0;
4663
4664  no_more_source:
4665   detect_info->found |= found;
4666   return 1;
4667 }
4668
4669 static void
4670 decode_coding_charset (coding)
4671      struct coding_system *coding;
4672 {
4673   const unsigned char *src = coding->source + coding->consumed;
4674   const unsigned char *src_end = coding->source + coding->src_bytes;
4675   const unsigned char *src_base;
4676   int *charbuf = coding->charbuf + coding->charbuf_used;
4677   int *charbuf_end
4678     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4679   int consumed_chars = 0, consumed_chars_base;
4680   int multibytep = coding->src_multibyte;
4681   Lisp_Object attrs, charset_list, valids;
4682   int char_offset = coding->produced_char;
4683   int last_offset = char_offset;
4684   int last_id = charset_ascii;
4685
4686   CODING_GET_INFO (coding, attrs, charset_list);
4687   valids = AREF (attrs, coding_attr_charset_valids);
4688
4689   while (1)
4690     {
4691       int c;
4692       Lisp_Object val;
4693       struct charset *charset;
4694       int dim;
4695       int len = 1;
4696       unsigned code;
4697
4698       src_base = src;
4699       consumed_chars_base = consumed_chars;
4700
4701       if (charbuf >= charbuf_end)
4702         break;
4703
4704       ONE_MORE_BYTE (c);
4705       if (c < 0)
4706         goto invalid_code;
4707       code = c;
4708
4709       val = AREF (valids, c);
4710       if (NILP (val))
4711         goto invalid_code;
4712       if (INTEGERP (val))
4713         {
4714           charset = CHARSET_FROM_ID (XFASTINT (val));
4715           dim = CHARSET_DIMENSION (charset);
4716           while (len < dim)
4717             {
4718               ONE_MORE_BYTE (c);
4719               code = (code << 8) | c;
4720               len++;
4721             }
4722           CODING_DECODE_CHAR (coding, src, src_base, src_end,
4723                               charset, code, c);
4724         }
4725       else
4726         {
4727           /* VAL is a list of charset IDs.  It is assured that the
4728              list is sorted by charset dimensions (smaller one
4729              comes first).  */
4730           while (CONSP (val))
4731             {
4732               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4733               dim = CHARSET_DIMENSION (charset);
4734               while (len < dim)
4735                 {
4736                   ONE_MORE_BYTE (c);
4737                   code = (code << 8) | c;
4738                   len++;
4739                 }
4740               CODING_DECODE_CHAR (coding, src, src_base,
4741                                   src_end, charset, code, c);
4742               if (c >= 0)
4743                 break;
4744               val = XCDR (val);
4745             }
4746         }
4747       if (c < 0)
4748         goto invalid_code;
4749       if (charset->id != charset_ascii
4750           && last_id != charset->id)
4751         {
4752           if (last_id != charset_ascii)
4753             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4754           last_id = charset->id;
4755           last_offset = char_offset;
4756         }
4757
4758       *charbuf++ = c;
4759       char_offset++;
4760       continue;
4761
4762     invalid_code:
4763       src = src_base;
4764       consumed_chars = consumed_chars_base;
4765       ONE_MORE_BYTE (c);
4766       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4767       char_offset++;
4768       coding->errors++;
4769     }
4770
4771  no_more_source:
4772   if (last_id != charset_ascii)
4773     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4774   coding->consumed_char += consumed_chars_base;
4775   coding->consumed = src_base - coding->source;
4776   coding->charbuf_used = charbuf - coding->charbuf;
4777 }
4778
4779 static int
4780 encode_coding_charset (coding)
4781      struct coding_system *coding;
4782 {
4783   int multibytep = coding->dst_multibyte;
4784   int *charbuf = coding->charbuf;
4785   int *charbuf_end = charbuf + coding->charbuf_used;
4786   unsigned char *dst = coding->destination + coding->produced;
4787   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4788   int safe_room = MAX_MULTIBYTE_LENGTH;
4789   int produced_chars = 0;
4790   Lisp_Object attrs, charset_list;
4791   int ascii_compatible;
4792   int c;
4793
4794   CODING_GET_INFO (coding, attrs, charset_list);
4795   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4796
4797   while (charbuf < charbuf_end)
4798     {
4799       struct charset *charset;
4800       unsigned code;
4801
4802       ASSURE_DESTINATION (safe_room);
4803       c = *charbuf++;
4804       if (ascii_compatible && ASCII_CHAR_P (c))
4805         EMIT_ONE_ASCII_BYTE (c);
4806       else if (CHAR_BYTE8_P (c))
4807         {
4808           c = CHAR_TO_BYTE8 (c);
4809           EMIT_ONE_BYTE (c);
4810         }
4811       else
4812         {
4813           charset = char_charset (c, charset_list, &code);
4814           if (charset)
4815             {
4816               if (CHARSET_DIMENSION (charset) == 1)
4817                 EMIT_ONE_BYTE (code);
4818               else if (CHARSET_DIMENSION (charset) == 2)
4819                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4820               else if (CHARSET_DIMENSION (charset) == 3)
4821                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4822               else
4823                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4824                                  (code >> 8) & 0xFF, code & 0xFF);
4825             }
4826           else
4827             {
4828               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4829                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4830               else
4831                 c = coding->default_char;
4832               EMIT_ONE_BYTE (c);
4833             }
4834         }
4835     }
4836
4837   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4838   coding->produced_char += produced_chars;
4839   coding->produced = dst - coding->destination;
4840   return 0;
4841 }
4842
4843 \f
4844 /*** 7. C library functions ***/
4845
4846 /* Setup coding context CODING from information about CODING_SYSTEM.
4847    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
4848    CODING_SYSTEM is invalid, signal an error.  */
4849
4850 void
4851 setup_coding_system (coding_system, coding)
4852      Lisp_Object coding_system;
4853      struct coding_system *coding;
4854 {
4855   Lisp_Object attrs;
4856   Lisp_Object eol_type;
4857   Lisp_Object coding_type;
4858   Lisp_Object val;
4859
4860   if (NILP (coding_system))
4861     coding_system = Qno_conversion;
4862
4863   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4864
4865   attrs = CODING_ID_ATTRS (coding->id);
4866   eol_type = CODING_ID_EOL_TYPE (coding->id);
4867
4868   coding->mode = 0;
4869   coding->head_ascii = -1;
4870   coding->common_flags
4871     = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4872   if (! NILP (CODING_ATTR_POST_READ (attrs)))
4873     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
4874   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
4875     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
4876   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
4877     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4878
4879   val = CODING_ATTR_SAFE_CHARSETS (attrs);
4880   coding->max_charset_id = SCHARS (val) - 1;
4881   coding->safe_charsets = (char *) SDATA (val);
4882   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4883
4884   coding_type = CODING_ATTR_TYPE (attrs);
4885   if (EQ (coding_type, Qundecided))
4886     {
4887       coding->detector = NULL;
4888       coding->decoder = decode_coding_raw_text;
4889       coding->encoder = encode_coding_raw_text;
4890       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4891     }
4892   else if (EQ (coding_type, Qiso_2022))
4893     {
4894       int i;
4895       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4896
4897       /* Invoke graphic register 0 to plane 0.  */
4898       CODING_ISO_INVOCATION (coding, 0) = 0;
4899       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
4900       CODING_ISO_INVOCATION (coding, 1)
4901         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4902       /* Setup the initial status of designation.  */
4903       for (i = 0; i < 4; i++)
4904         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4905       /* Not single shifting initially.  */
4906       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4907       /* Beginning of buffer should also be regarded as bol. */
4908       CODING_ISO_BOL (coding) = 1;
4909       coding->detector = detect_coding_iso_2022;
4910       coding->decoder = decode_coding_iso_2022;
4911       coding->encoder = encode_coding_iso_2022;
4912       if (flags & CODING_ISO_FLAG_SAFE)
4913         coding->mode |= CODING_MODE_SAFE_ENCODING;
4914       coding->common_flags
4915         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4916             | CODING_REQUIRE_FLUSHING_MASK);
4917       if (flags & CODING_ISO_FLAG_COMPOSITION)
4918         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4919       if (flags & CODING_ISO_FLAG_DESIGNATION)
4920         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
4921       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4922         {
4923           setup_iso_safe_charsets (attrs);
4924           val = CODING_ATTR_SAFE_CHARSETS (attrs);
4925           coding->max_charset_id = SCHARS (val) - 1;
4926           coding->safe_charsets = (char *) SDATA (val);
4927         }
4928       CODING_ISO_FLAGS (coding) = flags;
4929     }
4930   else if (EQ (coding_type, Qcharset))
4931     {
4932       coding->detector = detect_coding_charset;
4933       coding->decoder = decode_coding_charset;
4934       coding->encoder = encode_coding_charset;
4935       coding->common_flags
4936         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4937     }
4938   else if (EQ (coding_type, Qutf_8))
4939     {
4940       coding->detector = detect_coding_utf_8;
4941       coding->decoder = decode_coding_utf_8;
4942       coding->encoder = encode_coding_utf_8;
4943       coding->common_flags
4944         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4945     }
4946   else if (EQ (coding_type, Qutf_16))
4947     {
4948       val = AREF (attrs, coding_attr_utf_16_bom);
4949       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4950                                     : EQ (val, Qt) ? utf_16_with_bom
4951                                     : utf_16_without_bom);
4952       val = AREF (attrs, coding_attr_utf_16_endian);
4953       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
4954                                        : utf_16_little_endian);
4955       CODING_UTF_16_SURROGATE (coding) = 0;
4956       coding->detector = detect_coding_utf_16;
4957       coding->decoder = decode_coding_utf_16;
4958       coding->encoder = encode_coding_utf_16;
4959       coding->common_flags
4960         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4961       if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
4962         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4963     }
4964   else if (EQ (coding_type, Qccl))
4965     {
4966       coding->detector = detect_coding_ccl;
4967       coding->decoder = decode_coding_ccl;
4968       coding->encoder = encode_coding_ccl;
4969       coding->common_flags
4970         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4971             | CODING_REQUIRE_FLUSHING_MASK);
4972     }
4973   else if (EQ (coding_type, Qemacs_mule))
4974     {
4975       coding->detector = detect_coding_emacs_mule;
4976       coding->decoder = decode_coding_emacs_mule;
4977       coding->encoder = encode_coding_emacs_mule;
4978       coding->common_flags
4979         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4980       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4981           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4982         {
4983           Lisp_Object tail, safe_charsets;
4984           int max_charset_id = 0;
4985
4986           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4987                tail = XCDR (tail))
4988             if (max_charset_id < XFASTINT (XCAR (tail)))
4989               max_charset_id = XFASTINT (XCAR (tail));
4990           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4991                                         make_number (255));
4992           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4993                tail = XCDR (tail))
4994             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
4995           coding->max_charset_id = max_charset_id;
4996           coding->safe_charsets = (char *) SDATA (safe_charsets);
4997         }
4998     }
4999   else if (EQ (coding_type, Qshift_jis))
5000     {
5001       coding->detector = detect_coding_sjis;
5002       coding->decoder = decode_coding_sjis;
5003       coding->encoder = encode_coding_sjis;
5004       coding->common_flags
5005         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5006     }
5007   else if (EQ (coding_type, Qbig5))
5008     {
5009       coding->detector = detect_coding_big5;
5010       coding->decoder = decode_coding_big5;
5011       coding->encoder = encode_coding_big5;
5012       coding->common_flags
5013         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5014     }
5015   else                          /* EQ (coding_type, Qraw_text) */
5016     {
5017       coding->detector = NULL;
5018       coding->decoder = decode_coding_raw_text;
5019       coding->encoder = encode_coding_raw_text;
5020     }
5021
5022   return;
5023 }
5024
5025 /* Return raw-text or one of its subsidiaries that has the same
5026    eol_type as CODING-SYSTEM.  */
5027
5028 Lisp_Object
5029 raw_text_coding_system (coding_system)
5030      Lisp_Object coding_system;
5031 {
5032   Lisp_Object spec, attrs;
5033   Lisp_Object eol_type, raw_text_eol_type;
5034
5035   if (NILP (coding_system))
5036     return Qraw_text;
5037   spec = CODING_SYSTEM_SPEC (coding_system);
5038   attrs = AREF (spec, 0);
5039
5040   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5041     return coding_system;
5042
5043   eol_type = AREF (spec, 2);
5044   if (VECTORP (eol_type))
5045     return Qraw_text;
5046   spec = CODING_SYSTEM_SPEC (Qraw_text);
5047   raw_text_eol_type = AREF (spec, 2);
5048   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5049           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5050           : AREF (raw_text_eol_type, 2));
5051 }
5052
5053
5054 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5055    does, return one of the subsidiary that has the same eol-spec as
5056    PARENT.  Otherwise, return CODING_SYSTEM.  */
5057
5058 Lisp_Object
5059 coding_inherit_eol_type (coding_system, parent)
5060      Lisp_Object coding_system, parent;
5061 {
5062   Lisp_Object spec, eol_type;
5063
5064   if (NILP (coding_system))
5065     coding_system = Qraw_text;
5066   spec = CODING_SYSTEM_SPEC (coding_system);
5067   eol_type = AREF (spec, 2);
5068   if (VECTORP (eol_type)
5069       && ! NILP (parent))
5070     {
5071       Lisp_Object parent_spec;
5072       Lisp_Object parent_eol_type;
5073
5074       parent_spec
5075         = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
5076       parent_eol_type = AREF (parent_spec, 2);
5077       if (EQ (parent_eol_type, Qunix))
5078         coding_system = AREF (eol_type, 0);
5079       else if (EQ (parent_eol_type, Qdos))
5080         coding_system = AREF (eol_type, 1);
5081       else if (EQ (parent_eol_type, Qmac))
5082         coding_system = AREF (eol_type, 2);
5083     }
5084   return coding_system;
5085 }
5086
5087 /* Emacs has a mechanism to automatically detect a coding system if it
5088    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5089    it's impossible to distinguish some coding systems accurately
5090    because they use the same range of codes.  So, at first, coding
5091    systems are categorized into 7, those are:
5092
5093    o coding-category-emacs-mule
5094
5095         The category for a coding system which has the same code range
5096         as Emacs' internal format.  Assigned the coding-system (Lisp
5097         symbol) `emacs-mule' by default.
5098
5099    o coding-category-sjis
5100
5101         The category for a coding system which has the same code range
5102         as SJIS.  Assigned the coding-system (Lisp
5103         symbol) `japanese-shift-jis' by default.
5104
5105    o coding-category-iso-7
5106
5107         The category for a coding system which has the same code range
5108         as ISO2022 of 7-bit environment.  This doesn't use any locking
5109         shift and single shift functions.  This can encode/decode all
5110         charsets.  Assigned the coding-system (Lisp symbol)
5111         `iso-2022-7bit' by default.
5112
5113    o coding-category-iso-7-tight
5114
5115         Same as coding-category-iso-7 except that this can
5116         encode/decode only the specified charsets.
5117
5118    o coding-category-iso-8-1
5119
5120         The category for a coding system which has the same code range
5121         as ISO2022 of 8-bit environment and graphic plane 1 used only
5122         for DIMENSION1 charset.  This doesn't use any locking shift
5123         and single shift functions.  Assigned the coding-system (Lisp
5124         symbol) `iso-latin-1' by default.
5125
5126    o coding-category-iso-8-2
5127
5128         The category for a coding system which has the same code range
5129         as ISO2022 of 8-bit environment and graphic plane 1 used only
5130         for DIMENSION2 charset.  This doesn't use any locking shift
5131         and single shift functions.  Assigned the coding-system (Lisp
5132         symbol) `japanese-iso-8bit' by default.
5133
5134    o coding-category-iso-7-else
5135
5136         The category for a coding system which has the same code range
5137         as ISO2022 of 7-bit environemnt but uses locking shift or
5138         single shift functions.  Assigned the coding-system (Lisp
5139         symbol) `iso-2022-7bit-lock' by default.
5140
5141    o coding-category-iso-8-else
5142
5143         The category for a coding system which has the same code range
5144         as ISO2022 of 8-bit environemnt but uses locking shift or
5145         single shift functions.  Assigned the coding-system (Lisp
5146         symbol) `iso-2022-8bit-ss2' by default.
5147
5148    o coding-category-big5
5149
5150         The category for a coding system which has the same code range
5151         as BIG5.  Assigned the coding-system (Lisp symbol)
5152         `cn-big5' by default.
5153
5154    o coding-category-utf-8
5155
5156         The category for a coding system which has the same code range
5157         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
5158         symbol) `utf-8' by default.
5159
5160    o coding-category-utf-16-be
5161
5162         The category for a coding system in which a text has an
5163         Unicode signature (cf. Unicode Standard) in the order of BIG
5164         endian at the head.  Assigned the coding-system (Lisp symbol)
5165         `utf-16-be' by default.
5166
5167    o coding-category-utf-16-le
5168
5169         The category for a coding system in which a text has an
5170         Unicode signature (cf. Unicode Standard) in the order of
5171         LITTLE endian at the head.  Assigned the coding-system (Lisp
5172         symbol) `utf-16-le' by default.
5173
5174    o coding-category-ccl
5175
5176         The category for a coding system of which encoder/decoder is
5177         written in CCL programs.  The default value is nil, i.e., no
5178         coding system is assigned.
5179
5180    o coding-category-binary
5181
5182         The category for a coding system not categorized in any of the
5183         above.  Assigned the coding-system (Lisp symbol)
5184         `no-conversion' by default.
5185
5186    Each of them is a Lisp symbol and the value is an actual
5187    `coding-system's (this is also a Lisp symbol) assigned by a user.
5188    What Emacs does actually is to detect a category of coding system.
5189    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5190    decide only one possible category, it selects a category of the
5191    highest priority.  Priorities of categories are also specified by a
5192    user in a Lisp variable `coding-category-list'.
5193
5194 */
5195
5196 #define EOL_SEEN_NONE   0
5197 #define EOL_SEEN_LF     1
5198 #define EOL_SEEN_CR     2
5199 #define EOL_SEEN_CRLF   4
5200
5201 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5202    SOURCE is encoded.  If CATEGORY is one of
5203    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5204    two-byte, else they are encoded by one-byte.
5205
5206    Return one of EOL_SEEN_XXX.  */
5207
5208 #define MAX_EOL_CHECK_COUNT 3
5209
5210 static int
5211 detect_eol (source, src_bytes, category)
5212      unsigned char *source;
5213      EMACS_INT src_bytes;
5214      enum coding_category category;
5215 {
5216   unsigned char *src = source, *src_end = src + src_bytes;
5217   unsigned char c;
5218   int total  = 0;
5219   int eol_seen = EOL_SEEN_NONE;
5220
5221   if ((1 << category) & CATEGORY_MASK_UTF_16)
5222     {
5223       int msb, lsb;
5224
5225       msb = category == (coding_category_utf_16_le
5226                          | coding_category_utf_16_le_nosig);
5227       lsb = 1 - msb;
5228
5229       while (src + 1 < src_end)
5230         {
5231           c = src[lsb];
5232           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5233             {
5234               int this_eol;
5235
5236               if (c == '\n')
5237                 this_eol = EOL_SEEN_LF;
5238               else if (src + 3 >= src_end
5239                        || src[msb + 2] != 0
5240                        || src[lsb + 2] != '\n')
5241                 this_eol = EOL_SEEN_CR;
5242               else
5243                 this_eol = EOL_SEEN_CRLF;
5244
5245               if (eol_seen == EOL_SEEN_NONE)
5246                 /* This is the first end-of-line.  */
5247                 eol_seen = this_eol;
5248               else if (eol_seen != this_eol)
5249                 {
5250                   /* The found type is different from what found before.  */
5251                   eol_seen = EOL_SEEN_LF;
5252                   break;
5253                 }
5254               if (++total == MAX_EOL_CHECK_COUNT)
5255                 break;
5256             }
5257           src += 2;
5258         }
5259     }
5260   else
5261     {
5262       while (src < src_end)
5263         {
5264           c = *src++;
5265           if (c == '\n' || c == '\r')
5266             {
5267               int this_eol;
5268
5269               if (c == '\n')
5270                 this_eol = EOL_SEEN_LF;
5271               else if (src >= src_end || *src != '\n')
5272                 this_eol = EOL_SEEN_CR;
5273               else
5274                 this_eol = EOL_SEEN_CRLF, src++;
5275
5276               if (eol_seen == EOL_SEEN_NONE)
5277                 /* This is the first end-of-line.  */
5278                 eol_seen = this_eol;
5279               else if (eol_seen != this_eol)
5280                 {
5281                   /* The found type is different from what found before.  */
5282                   eol_seen = EOL_SEEN_LF;
5283                   break;
5284                 }
5285               if (++total == MAX_EOL_CHECK_COUNT)
5286                 break;
5287             }
5288         }
5289     }
5290   return eol_seen;
5291 }
5292
5293
5294 static Lisp_Object
5295 adjust_coding_eol_type (coding, eol_seen)
5296      struct coding_system *coding;
5297      int eol_seen;
5298 {
5299   Lisp_Object eol_type;
5300
5301   eol_type = CODING_ID_EOL_TYPE (coding->id);
5302   if (eol_seen & EOL_SEEN_LF)
5303     {
5304       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5305       eol_type = Qunix;
5306     }
5307   else if (eol_seen & EOL_SEEN_CRLF)
5308     {
5309       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5310       eol_type = Qdos;
5311     }
5312   else if (eol_seen & EOL_SEEN_CR)
5313     {
5314       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5315       eol_type = Qmac;
5316     }
5317   return eol_type;
5318 }
5319
5320 /* Detect how a text specified in CODING is encoded.  If a coding
5321    system is detected, update fields of CODING by the detected coding
5322    system.  */
5323
5324 void
5325 detect_coding (coding)
5326      struct coding_system *coding;
5327 {
5328   const unsigned char *src, *src_end;
5329   Lisp_Object attrs, coding_type;
5330
5331   coding->consumed = coding->consumed_char = 0;
5332   coding->produced = coding->produced_char = 0;
5333   coding_set_source (coding);
5334
5335   src_end = coding->source + coding->src_bytes;
5336
5337   /* If we have not yet decided the text encoding type, detect it
5338      now.  */
5339   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5340     {
5341       int c, i;
5342
5343       for (i = 0, src = coding->source; src < src_end; i++, src++)
5344         {
5345           c = *src;
5346           if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5347                                         || c == ISO_CODE_SI
5348                                         || c == ISO_CODE_SO)))
5349             break;
5350         }
5351       coding->head_ascii = src - (coding->source + coding->consumed);
5352
5353       if (coding->head_ascii < coding->src_bytes)
5354         {
5355           struct coding_detection_info detect_info;
5356           enum coding_category category;
5357           struct coding_system *this;
5358
5359           detect_info.checked = detect_info.found = detect_info.rejected = 0;
5360           for (i = 0; i < coding_category_raw_text; i++)
5361             {
5362               category = coding_priorities[i];
5363               this = coding_categories + category;
5364               if (this->id < 0)
5365                 {
5366                   /* No coding system of this category is defined.  */
5367                   detect_info.rejected |= (1 << category);
5368                 }
5369               else if (category >= coding_category_raw_text)
5370                 continue;
5371               else if (detect_info.checked & (1 << category))
5372                 {
5373                   if (detect_info.found & (1 << category))
5374                     break;
5375                 }
5376               else if ((*(this->detector)) (coding, &detect_info)
5377                        && detect_info.found & (1 << category))
5378                 {
5379                   if (category == coding_category_utf_16_auto)
5380                     {
5381                       if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5382                         category = coding_category_utf_16_le;
5383                       else
5384                         category = coding_category_utf_16_be;
5385                     }
5386                   break;
5387                 }
5388             }
5389           if (i < coding_category_raw_text)
5390             setup_coding_system (CODING_ID_NAME (this->id), coding);
5391           else if (detect_info.rejected == CATEGORY_MASK_ANY)
5392             setup_coding_system (Qraw_text, coding);
5393           else if (detect_info.rejected)
5394             for (i = 0; i < coding_category_raw_text; i++)
5395               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5396                 {
5397                   this = coding_categories + coding_priorities[i];
5398                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5399                   break;
5400                 }
5401         }
5402     }
5403   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5404            == coding_category_utf_16_auto)
5405     {
5406       Lisp_Object coding_systems;
5407       struct coding_detection_info detect_info;
5408
5409       coding_systems
5410         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5411       detect_info.found = detect_info.rejected = 0;
5412       if (CONSP (coding_systems)
5413           && detect_coding_utf_16 (coding, &detect_info))
5414         {
5415           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5416             setup_coding_system (XCAR (coding_systems), coding);
5417           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
5418             setup_coding_system (XCDR (coding_systems), coding);
5419         }
5420     }
5421 }
5422
5423
5424 static void
5425 decode_eol (coding)
5426      struct coding_system *coding;
5427 {
5428   Lisp_Object eol_type;
5429   unsigned char *p, *pbeg, *pend;
5430
5431   eol_type = CODING_ID_EOL_TYPE (coding->id);
5432   if (EQ (eol_type, Qunix))
5433     return;
5434
5435   if (NILP (coding->dst_object))
5436     pbeg = coding->destination;
5437   else
5438     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5439   pend = pbeg + coding->produced;
5440
5441   if (VECTORP (eol_type))
5442     {
5443       int eol_seen = EOL_SEEN_NONE;
5444
5445       for (p = pbeg; p < pend; p++)
5446         {
5447           if (*p == '\n')
5448             eol_seen |= EOL_SEEN_LF;
5449           else if (*p == '\r')
5450             {
5451               if (p + 1 < pend && *(p + 1) == '\n')
5452                 {
5453                   eol_seen |= EOL_SEEN_CRLF;
5454                   p++;
5455                 }
5456               else
5457                 eol_seen |= EOL_SEEN_CR;
5458             }
5459         }
5460       if (eol_seen != EOL_SEEN_NONE
5461           && eol_seen != EOL_SEEN_LF
5462           && eol_seen != EOL_SEEN_CRLF
5463           && eol_seen != EOL_SEEN_CR)
5464         eol_seen = EOL_SEEN_LF;
5465       if (eol_seen != EOL_SEEN_NONE)
5466         eol_type = adjust_coding_eol_type (coding, eol_seen);
5467     }
5468
5469   if (EQ (eol_type, Qmac))
5470     {
5471       for (p = pbeg; p < pend; p++)
5472         if (*p == '\r')
5473           *p = '\n';
5474     }
5475   else if (EQ (eol_type, Qdos))
5476     {
5477       int n = 0;
5478
5479       if (NILP (coding->dst_object))
5480         {
5481           for (p = pend - 2; p >= pbeg; p--)
5482             if (*p == '\r')
5483               {
5484                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5485                 n++;
5486               }
5487         }
5488       else
5489         {
5490           for (p = pend - 2; p >= pbeg; p--)
5491             if (*p == '\r')
5492               {
5493                 int pos_byte = coding->dst_pos_byte + (p - pbeg);
5494                 int pos = BYTE_TO_CHAR (pos_byte);
5495
5496                 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5497                 n++;
5498               }
5499         }
5500       coding->produced -= n;
5501       coding->produced_char -= n;
5502     }
5503 }
5504
5505
5506 /* Return a translation table (or list of them) from coding system
5507    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5508    decoding (ENCODEP is zero). */
5509
5510 static Lisp_Object
5511 get_translation_table (attrs, encodep, max_lookup)
5512      Lisp_Object attrs;
5513      int encodep, *max_lookup;
5514 {
5515   Lisp_Object standard, translation_table;
5516   Lisp_Object val;
5517
5518   if (encodep)
5519     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
5520       standard = Vstandard_translation_table_for_encode;
5521   else
5522     translation_table = CODING_ATTR_DECODE_TBL (attrs),
5523       standard = Vstandard_translation_table_for_decode;
5524   if (NILP (translation_table))
5525     translation_table = standard;
5526   else
5527     {
5528       if (SYMBOLP (translation_table))
5529         translation_table = Fget (translation_table, Qtranslation_table);
5530       else if (CONSP (translation_table))
5531         {
5532           translation_table = Fcopy_sequence (translation_table);
5533           for (val = translation_table; CONSP (val); val = XCDR (val))
5534             if (SYMBOLP (XCAR (val)))
5535               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
5536         }
5537       if (CHAR_TABLE_P (standard))
5538         {
5539           if (CONSP (translation_table))
5540             translation_table = nconc2 (translation_table,
5541                                         Fcons (standard, Qnil));
5542           else
5543             translation_table = Fcons (translation_table,
5544                                        Fcons (standard, Qnil));
5545         }
5546     }
5547   *max_lookup = 1;
5548   if (CHAR_TABLE_P (translation_table)
5549       && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
5550     {
5551       val = XCHAR_TABLE (translation_table)->extras[1];
5552       if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5553         *max_lookup = XFASTINT (val);
5554     }
5555   else if (CONSP (translation_table))
5556     {
5557       Lisp_Object tail, val;
5558
5559       for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
5560         if (CHAR_TABLE_P (XCAR (tail))
5561             && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
5562           {
5563             val = XCHAR_TABLE (XCAR (tail))->extras[1];
5564             if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5565               *max_lookup = XFASTINT (val);
5566           }
5567     }
5568   return translation_table;
5569 }
5570
5571 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
5572   do {                                                          \
5573     trans = Qnil;                                               \
5574     if (CHAR_TABLE_P (table))                                   \
5575       {                                                         \
5576         trans = CHAR_TABLE_REF (table, c);                      \
5577         if (CHARACTERP (trans))                                 \
5578           c = XFASTINT (trans), trans = Qnil;                   \
5579       }                                                         \
5580     else if (CONSP (table))                                     \
5581       {                                                         \
5582         Lisp_Object tail;                                       \
5583                                                                 \
5584         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
5585           if (CHAR_TABLE_P (XCAR (tail)))                       \
5586             {                                                   \
5587               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
5588               if (CHARACTERP (trans))                           \
5589                 c = XFASTINT (trans), trans = Qnil;             \
5590               else if (! NILP (trans))                          \
5591                 break;                                          \
5592             }                                                   \
5593       }                                                         \
5594   } while (0)
5595
5596
5597 static Lisp_Object
5598 get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
5599      Lisp_Object val;
5600      int *buf, *buf_end;
5601      int last_block;
5602      int *from_nchars, *to_nchars;
5603 {
5604   /* VAL is TO or (([FROM-CHAR ...] .  TO) ...) where TO is TO-CHAR or
5605      [TO-CHAR ...].  */
5606   if (CONSP (val))
5607     {
5608       Lisp_Object from, tail;
5609       int i, len;
5610
5611       for (tail = val; CONSP (tail); tail = XCDR (tail))
5612         {
5613           val = XCAR (tail);
5614           from = XCAR (val);
5615           len = ASIZE (from);
5616           for (i = 0; i < len; i++)
5617             {
5618               if (buf + i == buf_end)
5619                 {
5620                   if (! last_block)
5621                     return Qt;
5622                   break;
5623                 }
5624               if (XINT (AREF (from, i)) != buf[i])
5625                 break;
5626             }
5627           if (i == len)
5628             {
5629               val = XCDR (val);
5630               *from_nchars = len;
5631               break;
5632             }
5633         }
5634       if (! CONSP (tail))
5635         return Qnil;
5636     }
5637   if (VECTORP (val))
5638     *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
5639   else
5640     *buf = XINT (val);
5641   return val;
5642 }
5643
5644
5645 static int
5646 produce_chars (coding, translation_table, last_block)
5647      struct coding_system *coding;
5648      Lisp_Object translation_table;
5649      int last_block;
5650 {
5651   unsigned char *dst = coding->destination + coding->produced;
5652   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5653   int produced;
5654   int produced_chars = 0;
5655   int carryover = 0;
5656
5657   if (! coding->chars_at_source)
5658     {
5659       /* Characters are in coding->charbuf.  */
5660       int *buf = coding->charbuf;
5661       int *buf_end = buf + coding->charbuf_used;
5662
5663       if (BUFFERP (coding->src_object)
5664           && EQ (coding->src_object, coding->dst_object))
5665         dst_end = ((unsigned char *) coding->source) + coding->consumed;
5666
5667       while (buf < buf_end)
5668         {
5669           int c = *buf, i;
5670
5671           if (c >= 0)
5672             {
5673               int from_nchars = 1, to_nchars = 1;
5674               Lisp_Object trans = Qnil;
5675
5676               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
5677               if (! NILP (trans))
5678                 {
5679                   trans = get_translation (trans, buf, buf_end, last_block,
5680                                            &from_nchars, &to_nchars);
5681                   if (EQ (trans, Qt))
5682                     break;
5683                   c = *buf;
5684                 }
5685
5686               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
5687                 {
5688                   dst = alloc_destination (coding,
5689                                            buf_end - buf
5690                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
5691                                            dst);
5692                   dst_end = coding->destination + coding->dst_bytes;
5693                 }
5694
5695               for (i = 0; i < to_nchars; i++)
5696                 {
5697                   if (i > 0)
5698                     c = XINT (AREF (trans, i));
5699                   if (coding->dst_multibyte
5700                       || ! CHAR_BYTE8_P (c))
5701                     CHAR_STRING_ADVANCE (c, dst);
5702                   else
5703                     *dst++ = CHAR_TO_BYTE8 (c);
5704                 }
5705               produced_chars += to_nchars;
5706               *buf++ = to_nchars;
5707               while (--from_nchars > 0)
5708                 *buf++ = 0;
5709             }
5710           else
5711             /* This is an annotation datum.  (-C) is the length.  */
5712             buf += -c;
5713         }
5714       carryover = buf_end - buf;
5715     }
5716   else
5717     {
5718       const unsigned char *src = coding->source;
5719       const unsigned char *src_end = src + coding->src_bytes;
5720       Lisp_Object eol_type;
5721
5722       eol_type = CODING_ID_EOL_TYPE (coding->id);
5723
5724       if (coding->src_multibyte != coding->dst_multibyte)
5725         {
5726           if (coding->src_multibyte)
5727             {
5728               int multibytep = 1;
5729               int consumed_chars;
5730
5731               while (1)
5732                 {
5733                   const unsigned char *src_base = src;
5734                   int c;
5735
5736                   ONE_MORE_BYTE (c);
5737                   if (c == '\r')
5738                     {
5739                       if (EQ (eol_type, Qdos))
5740                         {
5741                           if (src == src_end)
5742                             {
5743                               record_conversion_result
5744                                 (coding, CODING_RESULT_INSUFFICIENT_SRC);
5745                               goto no_more_source;
5746                             }
5747                           if (*src == '\n')
5748                             c = *src++;
5749                         }
5750                       else if (EQ (eol_type, Qmac))
5751                         c = '\n';
5752                     }
5753                   if (dst == dst_end)
5754                     {
5755                       coding->consumed = src - coding->source;
5756
5757                     if (EQ (coding->src_object, coding->dst_object))
5758                       dst_end = (unsigned char *) src;
5759                     if (dst == dst_end)
5760                       {
5761                         dst = alloc_destination (coding, src_end - src + 1,
5762                                                  dst);
5763                         dst_end = coding->destination + coding->dst_bytes;
5764                         coding_set_source (coding);
5765                         src = coding->source + coding->consumed;
5766                         src_end = coding->source + coding->src_bytes;
5767                       }
5768                     }
5769                   *dst++ = c;
5770                   produced_chars++;
5771                 }
5772             no_more_source:
5773               ;
5774             }
5775           else
5776             while (src < src_end)
5777               {
5778                 int multibytep = 1;
5779                 int c = *src++;
5780
5781                 if (c == '\r')
5782                   {
5783                     if (EQ (eol_type, Qdos))
5784                       {
5785                         if (src < src_end
5786                             && *src == '\n')
5787                           c = *src++;
5788                       }
5789                     else if (EQ (eol_type, Qmac))
5790                       c = '\n';
5791                   }
5792                 if (dst >= dst_end - 1)
5793                   {
5794                     coding->consumed = src - coding->source;
5795
5796                     if (EQ (coding->src_object, coding->dst_object))
5797                       dst_end = (unsigned char *) src;
5798                     if (dst >= dst_end - 1)
5799                       {
5800                         dst = alloc_destination (coding, src_end - src + 2,
5801                                                  dst);
5802                         dst_end = coding->destination + coding->dst_bytes;
5803                         coding_set_source (coding);
5804                         src = coding->source + coding->consumed;
5805                         src_end = coding->source + coding->src_bytes;
5806                       }
5807                   }
5808                 EMIT_ONE_BYTE (c);
5809               }
5810         }
5811       else
5812         {
5813           if (!EQ (coding->src_object, coding->dst_object))
5814             {
5815               int require = coding->src_bytes - coding->dst_bytes;
5816
5817               if (require > 0)
5818                 {
5819                   EMACS_INT offset = src - coding->source;
5820
5821                   dst = alloc_destination (coding, require, dst);
5822                   coding_set_source (coding);
5823                   src = coding->source + offset;
5824                   src_end = coding->source + coding->src_bytes;
5825                 }
5826             }
5827           produced_chars = coding->src_chars;
5828           while (src < src_end)
5829             {
5830               int c = *src++;
5831
5832               if (c == '\r')
5833                 {
5834                   if (EQ (eol_type, Qdos))
5835                     {
5836                       if (src < src_end
5837                           && *src == '\n')
5838                         c = *src++;
5839                       produced_chars--;
5840                     }
5841                   else if (EQ (eol_type, Qmac))
5842                     c = '\n';
5843                 }
5844               *dst++ = c;
5845             }
5846         }
5847       coding->consumed = coding->src_bytes;
5848       coding->consumed_char = coding->src_chars;
5849     }
5850
5851   produced = dst - (coding->destination + coding->produced);
5852   if (BUFFERP (coding->dst_object))
5853     insert_from_gap (produced_chars, produced);
5854   coding->produced += produced;
5855   coding->produced_char += produced_chars;
5856   return carryover;
5857 }
5858
5859 /* Compose text in CODING->object according to the annotation data at
5860    CHARBUF.  CHARBUF is an array:
5861      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5862  */
5863
5864 static INLINE void
5865 produce_composition (coding, charbuf, pos)
5866      struct coding_system *coding;
5867      int *charbuf;
5868      EMACS_INT pos;
5869 {
5870   int len;
5871   EMACS_INT to;
5872   enum composition_method method;
5873   Lisp_Object components;
5874
5875   len = -charbuf[0];
5876   to = pos + charbuf[2];
5877   method = (enum composition_method) (charbuf[3]);
5878
5879   if (method == COMPOSITION_RELATIVE)
5880     components = Qnil;
5881   else
5882     {
5883       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5884       int i;
5885
5886       len -= 4;
5887       charbuf += 4;
5888       for (i = 0; i < len; i++)
5889         args[i] = make_number (charbuf[i]);
5890       components = (method == COMPOSITION_WITH_ALTCHARS
5891                     ? Fstring (len, args) : Fvector (len, args));
5892     }
5893   compose_text (pos, to, components, Qnil, coding->dst_object);
5894 }
5895
5896
5897 /* Put `charset' property on text in CODING->object according to
5898    the annotation data at CHARBUF.  CHARBUF is an array:
5899      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
5900  */
5901
5902 static INLINE void
5903 produce_charset (coding, charbuf, pos)
5904      struct coding_system *coding;
5905      int *charbuf;
5906      EMACS_INT pos;
5907 {
5908   EMACS_INT from = pos - charbuf[2];
5909   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
5910
5911   Fput_text_property (make_number (from), make_number (pos),
5912                       Qcharset, CHARSET_NAME (charset),
5913                       coding->dst_object);
5914 }
5915
5916
5917 #define CHARBUF_SIZE 0x4000
5918
5919 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
5920   do {                                                                  \
5921     int size = CHARBUF_SIZE;;                                           \
5922                                                                         \
5923     coding->charbuf = NULL;                                             \
5924     while (size > 1024)                                                 \
5925       {                                                                 \
5926         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
5927         if (coding->charbuf)                                            \
5928           break;                                                        \
5929         size >>= 1;                                                     \
5930       }                                                                 \
5931     if (! coding->charbuf)                                              \
5932       {                                                                 \
5933         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
5934         return coding->result;                                          \
5935       }                                                                 \
5936     coding->charbuf_size = size;                                        \
5937   } while (0)
5938
5939
5940 static void
5941 produce_annotation (coding, pos)
5942      struct coding_system *coding;
5943      EMACS_INT pos;
5944 {
5945   int *charbuf = coding->charbuf;
5946   int *charbuf_end = charbuf + coding->charbuf_used;
5947
5948   if (NILP (coding->dst_object))
5949     return;
5950
5951   while (charbuf < charbuf_end)
5952     {
5953       if (*charbuf >= 0)
5954         pos += *charbuf++;
5955       else
5956         {
5957           int len = -*charbuf;
5958           switch (charbuf[1])
5959             {
5960             case CODING_ANNOTATE_COMPOSITION_MASK:
5961               produce_composition (coding, charbuf, pos);
5962               break;
5963             case CODING_ANNOTATE_CHARSET_MASK:
5964               produce_charset (coding, charbuf, pos);
5965               break;
5966             default:
5967               abort ();
5968             }
5969           charbuf += len;
5970         }
5971     }
5972 }
5973
5974 /* Decode the data at CODING->src_object into CODING->dst_object.
5975    CODING->src_object is a buffer, a string, or nil.
5976    CODING->dst_object is a buffer.
5977
5978    If CODING->src_object is a buffer, it must be the current buffer.
5979    In this case, if CODING->src_pos is positive, it is a position of
5980    the source text in the buffer, otherwise, the source text is in the
5981    gap area of the buffer, and CODING->src_pos specifies the offset of
5982    the text from GPT (which must be the same as PT).  If this is the
5983    same buffer as CODING->dst_object, CODING->src_pos must be
5984    negative.
5985
5986    If CODING->src_object is a string, CODING->src_pos in an index to
5987    that string.
5988
5989    If CODING->src_object is nil, CODING->source must already point to
5990    the non-relocatable memory area.  In this case, CODING->src_pos is
5991    an offset from CODING->source.
5992
5993    The decoded data is inserted at the current point of the buffer
5994    CODING->dst_object.
5995 */
5996
5997 static int
5998 decode_coding (coding)
5999      struct coding_system *coding;
6000 {
6001   Lisp_Object attrs;
6002   Lisp_Object undo_list;
6003   Lisp_Object translation_table;
6004   int carryover;
6005   int max_lookup;
6006   int i;
6007
6008   if (BUFFERP (coding->src_object)
6009       && coding->src_pos > 0
6010       && coding->src_pos < GPT
6011       && coding->src_pos + coding->src_chars > GPT)
6012     move_gap_both (coding->src_pos, coding->src_pos_byte);
6013
6014   undo_list = Qt;
6015   if (BUFFERP (coding->dst_object))
6016     {
6017       if (current_buffer != XBUFFER (coding->dst_object))
6018         set_buffer_internal (XBUFFER (coding->dst_object));
6019       if (GPT != PT)
6020         move_gap_both (PT, PT_BYTE);
6021       undo_list = current_buffer->undo_list;
6022       current_buffer->undo_list = Qt;
6023     }
6024
6025   coding->consumed = coding->consumed_char = 0;
6026   coding->produced = coding->produced_char = 0;
6027   coding->chars_at_source = 0;
6028   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6029   coding->errors = 0;
6030
6031   ALLOC_CONVERSION_WORK_AREA (coding);
6032
6033   attrs = CODING_ID_ATTRS (coding->id);
6034   translation_table = get_translation_table (attrs, 0, &max_lookup);
6035
6036   carryover = 0;
6037   do
6038     {
6039       EMACS_INT pos = coding->dst_pos + coding->produced_char;
6040
6041       coding_set_source (coding);
6042       coding->annotated = 0;
6043       coding->charbuf_used = carryover;
6044       (*(coding->decoder)) (coding);
6045       coding_set_destination (coding);
6046       carryover = produce_chars (coding, translation_table, 0);
6047       if (coding->annotated)
6048         produce_annotation (coding, pos);
6049       for (i = 0; i < carryover; i++)
6050         coding->charbuf[i]
6051           = coding->charbuf[coding->charbuf_used - carryover + i];
6052     }
6053   while (coding->consumed < coding->src_bytes
6054          && ! coding->result);
6055
6056   if (carryover > 0)
6057     {
6058       coding_set_destination (coding);
6059       coding->charbuf_used = carryover;
6060       produce_chars (coding, translation_table, 1);
6061     }
6062
6063   coding->carryover_bytes = 0;
6064   if (coding->consumed < coding->src_bytes)
6065     {
6066       int nbytes = coding->src_bytes - coding->consumed;
6067       const unsigned char *src;
6068
6069       coding_set_source (coding);
6070       coding_set_destination (coding);
6071       src = coding->source + coding->consumed;
6072
6073       if (coding->mode & CODING_MODE_LAST_BLOCK)
6074         {
6075           /* Flush out unprocessed data as binary chars.  We are sure
6076              that the number of data is less than the size of
6077              coding->charbuf.  */
6078           coding->charbuf_used = 0;
6079           while (nbytes-- > 0)
6080             {
6081               int c = *src++;
6082
6083               coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
6084             }
6085           produce_chars (coding);
6086         }
6087       else
6088         {
6089           /* Record unprocessed bytes in coding->carryover.  We are
6090              sure that the number of data is less than the size of
6091              coding->carryover.  */
6092           unsigned char *p = coding->carryover;
6093
6094           coding->carryover_bytes = nbytes;
6095           while (nbytes-- > 0)
6096             *p++ = *src++;
6097         }
6098       coding->consumed = coding->src_bytes;
6099     }
6100
6101   if (BUFFERP (coding->dst_object))
6102     {
6103       current_buffer->undo_list = undo_list;
6104       record_insert (coding->dst_pos, coding->produced_char);
6105     }
6106   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6107     decode_eol (coding);
6108   return coding->result;
6109 }
6110
6111
6112 /* Extract an annotation datum from a composition starting at POS and
6113    ending before LIMIT of CODING->src_object (buffer or string), store
6114    the data in BUF, set *STOP to a starting position of the next
6115    composition (if any) or to LIMIT, and return the address of the
6116    next element of BUF.
6117
6118    If such an annotation is not found, set *STOP to a starting
6119    position of a composition after POS (if any) or to LIMIT, and
6120    return BUF.  */
6121
6122 static INLINE int *
6123 handle_composition_annotation (pos, limit, coding, buf, stop)
6124      EMACS_INT pos, limit;
6125      struct coding_system *coding;
6126      int *buf;
6127      EMACS_INT *stop;
6128 {
6129   EMACS_INT start, end;
6130   Lisp_Object prop;
6131
6132   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6133       || end > limit)
6134     *stop = limit;
6135   else if (start > pos)
6136     *stop = start;
6137   else
6138     {
6139       if (start == pos)
6140         {
6141           /* We found a composition.  Store the corresponding
6142              annotation data in BUF.  */
6143           int *head = buf;
6144           enum composition_method method = COMPOSITION_METHOD (prop);
6145           int nchars = COMPOSITION_LENGTH (prop);
6146
6147           ADD_COMPOSITION_DATA (buf, nchars, method);
6148           if (method != COMPOSITION_RELATIVE)
6149             {
6150               Lisp_Object components;
6151               int len, i, i_byte;
6152
6153               components = COMPOSITION_COMPONENTS (prop);
6154               if (VECTORP (components))
6155                 {
6156                   len = XVECTOR (components)->size;
6157                   for (i = 0; i < len; i++)
6158                     *buf++ = XINT (AREF (components, i));
6159                 }
6160               else if (STRINGP (components))
6161                 {
6162                   len = SCHARS (components);
6163                   i = i_byte = 0;
6164                   while (i < len)
6165                     {
6166                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6167                       buf++;
6168                     }
6169                 }
6170               else if (INTEGERP (components))
6171                 {
6172                   len = 1;
6173                   *buf++ = XINT (components);
6174                 }
6175               else if (CONSP (components))
6176                 {
6177                   for (len = 0; CONSP (components);
6178                        len++, components = XCDR (components))
6179                     *buf++ = XINT (XCAR (components));
6180                 }
6181               else
6182                 abort ();
6183               *head -= len;
6184             }
6185         }
6186
6187       if (find_composition (end, limit, &start, &end, &prop,
6188                             coding->src_object)
6189           && end <= limit)
6190         *stop = start;
6191       else
6192         *stop = limit;
6193     }
6194   return buf;
6195 }
6196
6197
6198 /* Extract an annotation datum from a text property `charset' at POS of
6199    CODING->src_object (buffer of string), store the data in BUF, set
6200    *STOP to the position where the value of `charset' property changes
6201    (limiting by LIMIT), and return the address of the next element of
6202    BUF.
6203
6204    If the property value is nil, set *STOP to the position where the
6205    property value is non-nil (limiting by LIMIT), and return BUF.  */
6206
6207 static INLINE int *
6208 handle_charset_annotation (pos, limit, coding, buf, stop)
6209      EMACS_INT pos, limit;
6210      struct coding_system *coding;
6211      int *buf;
6212      EMACS_INT *stop;
6213 {
6214   Lisp_Object val, next;
6215   int id;
6216
6217   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6218   if (! NILP (val) && CHARSETP (val))
6219     id = XINT (CHARSET_SYMBOL_ID (val));
6220   else
6221     id = -1;
6222   ADD_CHARSET_DATA (buf, 0, id);
6223   next = Fnext_single_property_change (make_number (pos), Qcharset,
6224                                        coding->src_object,
6225                                        make_number (limit));
6226   *stop = XINT (next);
6227   return buf;
6228 }
6229
6230
6231 static void
6232 consume_chars (coding, translation_table, max_lookup)
6233      struct coding_system *coding;
6234      Lisp_Object translation_table;
6235      int max_lookup;
6236 {
6237   int *buf = coding->charbuf;
6238   int *buf_end = coding->charbuf + coding->charbuf_size;
6239   const unsigned char *src = coding->source + coding->consumed;
6240   const unsigned char *src_end = coding->source + coding->src_bytes;
6241   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6242   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6243   int multibytep = coding->src_multibyte;
6244   Lisp_Object eol_type;
6245   int c;
6246   EMACS_INT stop, stop_composition, stop_charset;
6247   int *lookup_buf = NULL;
6248
6249   if (! NILP (translation_table))
6250     lookup_buf = alloca (sizeof (int) * max_lookup);
6251
6252   eol_type = CODING_ID_EOL_TYPE (coding->id);
6253   if (VECTORP (eol_type))
6254     eol_type = Qunix;
6255
6256   /* Note: composition handling is not yet implemented.  */
6257   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6258
6259   if (NILP (coding->src_object))
6260     stop = stop_composition = stop_charset = end_pos;
6261   else
6262     {
6263       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6264         stop = stop_composition = pos;
6265       else
6266         stop = stop_composition = end_pos;
6267       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6268         stop = stop_charset = pos;
6269       else
6270         stop_charset = end_pos;
6271     }
6272
6273   /* Compensate for CRLF and conversion.  */
6274   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6275   while (buf < buf_end)
6276     {
6277       Lisp_Object trans;
6278
6279       if (pos == stop)
6280         {
6281           if (pos == end_pos)
6282             break;
6283           if (pos == stop_composition)
6284             buf = handle_composition_annotation (pos, end_pos, coding,
6285                                                  buf, &stop_composition);
6286           if (pos == stop_charset)
6287             buf = handle_charset_annotation (pos, end_pos, coding,
6288                                              buf, &stop_charset);
6289           stop = (stop_composition < stop_charset
6290                   ? stop_composition : stop_charset);
6291         }
6292
6293       if (! multibytep)
6294         {
6295           EMACS_INT bytes;
6296
6297           if (! CODING_FOR_UNIBYTE (coding)
6298               && (bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6299             c = STRING_CHAR_ADVANCE (src), pos += bytes;
6300           else
6301             c = *src++, pos++;
6302         }
6303       else
6304         c = STRING_CHAR_ADVANCE (src), pos++;
6305       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6306         c = '\n';
6307       if (! EQ (eol_type, Qunix))
6308         {
6309           if (c == '\n')
6310             {
6311               if (EQ (eol_type, Qdos))
6312                 *buf++ = '\r';
6313               else
6314                 c = '\r';
6315             }
6316         }
6317
6318       trans = Qnil;
6319       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6320       if (NILP (trans))
6321         *buf++ = c;
6322       else
6323         {
6324           int from_nchars = 1, to_nchars = 1;
6325           int *lookup_buf_end;
6326           const unsigned char *p = src;
6327           int i;
6328
6329           lookup_buf[0] = c;
6330           for (i = 1; i < max_lookup && p < src_end; i++)
6331             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6332           lookup_buf_end = lookup_buf + i;
6333           trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6334                                    &from_nchars, &to_nchars);
6335           if (EQ (trans, Qt)
6336               || buf + to_nchars > buf_end)
6337             break;
6338           *buf++ = *lookup_buf;
6339           for (i = 1; i < to_nchars; i++)
6340             *buf++ = XINT (AREF (trans, i));
6341           for (i = 1; i < from_nchars; i++, pos++)
6342             src += MULTIBYTE_LENGTH_NO_CHECK (src);
6343         }
6344     }
6345
6346   coding->consumed = src - coding->source;
6347   coding->consumed_char = pos - coding->src_pos;
6348   coding->charbuf_used = buf - coding->charbuf;
6349   coding->chars_at_source = 0;
6350 }
6351
6352
6353 /* Encode the text at CODING->src_object into CODING->dst_object.
6354    CODING->src_object is a buffer or a string.
6355    CODING->dst_object is a buffer or nil.
6356
6357    If CODING->src_object is a buffer, it must be the current buffer.
6358    In this case, if CODING->src_pos is positive, it is a position of
6359    the source text in the buffer, otherwise. the source text is in the
6360    gap area of the buffer, and coding->src_pos specifies the offset of
6361    the text from GPT (which must be the same as PT).  If this is the
6362    same buffer as CODING->dst_object, CODING->src_pos must be
6363    negative and CODING should not have `pre-write-conversion'.
6364
6365    If CODING->src_object is a string, CODING should not have
6366    `pre-write-conversion'.
6367
6368    If CODING->dst_object is a buffer, the encoded data is inserted at
6369    the current point of that buffer.
6370
6371    If CODING->dst_object is nil, the encoded data is placed at the
6372    memory area specified by CODING->destination.  */
6373
6374 static int
6375 encode_coding (coding)
6376      struct coding_system *coding;
6377 {
6378   Lisp_Object attrs;
6379   Lisp_Object translation_table;
6380   int max_lookup;
6381
6382   attrs = CODING_ID_ATTRS (coding->id);
6383   translation_table = get_translation_table (attrs, 1, &max_lookup);
6384
6385   if (BUFFERP (coding->dst_object))
6386     {
6387       set_buffer_internal (XBUFFER (coding->dst_object));
6388       coding->dst_multibyte
6389         = ! NILP (current_buffer->enable_multibyte_characters);
6390     }
6391
6392   coding->consumed = coding->consumed_char = 0;
6393   coding->produced = coding->produced_char = 0;
6394   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6395   coding->errors = 0;
6396
6397   ALLOC_CONVERSION_WORK_AREA (coding);
6398
6399   do {
6400     coding_set_source (coding);
6401     consume_chars (coding, translation_table, max_lookup);
6402     coding_set_destination (coding);
6403     (*(coding->encoder)) (coding);
6404   } while (coding->consumed_char < coding->src_chars);
6405
6406   if (BUFFERP (coding->dst_object))
6407     insert_from_gap (coding->produced_char, coding->produced);
6408
6409   return (coding->result);
6410 }
6411
6412
6413 /* Name (or base name) of work buffer for code conversion.  */
6414 static Lisp_Object Vcode_conversion_workbuf_name;
6415
6416 /* A working buffer used by the top level conversion.  Once it is
6417    created, it is never destroyed.  It has the name
6418    Vcode_conversion_workbuf_name.  The other working buffers are
6419    destroyed after the use is finished, and their names are modified
6420    versions of Vcode_conversion_workbuf_name.  */
6421 static Lisp_Object Vcode_conversion_reused_workbuf;
6422
6423 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
6424 static int reused_workbuf_in_use;
6425
6426
6427 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
6428    multibyteness of returning buffer.  */
6429
6430 Lisp_Object
6431 make_conversion_work_buffer (multibyte)
6432 {
6433   Lisp_Object name, workbuf;
6434   struct buffer *current;
6435
6436   if (reused_workbuf_in_use++)
6437     {
6438       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6439       workbuf = Fget_buffer_create (name);
6440     }
6441   else
6442     {
6443       name = Vcode_conversion_workbuf_name;
6444       workbuf = Fget_buffer_create (name);
6445       if (NILP (Vcode_conversion_reused_workbuf))
6446         Vcode_conversion_reused_workbuf = workbuf;
6447     }
6448   current = current_buffer;
6449   set_buffer_internal (XBUFFER (workbuf));
6450   Ferase_buffer ();
6451   current_buffer->undo_list = Qt;
6452   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
6453   set_buffer_internal (current);
6454   return workbuf;
6455 }
6456
6457
6458 static Lisp_Object
6459 code_conversion_restore (arg)
6460      Lisp_Object arg;
6461 {
6462   Lisp_Object current, workbuf;
6463
6464   current = XCAR (arg);
6465   workbuf = XCDR (arg);
6466   if (! NILP (workbuf))
6467     {
6468       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6469         reused_workbuf_in_use = 0;
6470       else if (! NILP (Fbuffer_live_p (workbuf)))
6471         Fkill_buffer (workbuf);
6472     }
6473   set_buffer_internal (XBUFFER (current));
6474   return Qnil;
6475 }
6476
6477 Lisp_Object
6478 code_conversion_save (with_work_buf, multibyte)
6479      int with_work_buf, multibyte;
6480 {
6481   Lisp_Object workbuf = Qnil;
6482
6483   if (with_work_buf)
6484     workbuf = make_conversion_work_buffer (multibyte);
6485   record_unwind_protect (code_conversion_restore,
6486                          Fcons (Fcurrent_buffer (), workbuf));
6487   return workbuf;
6488 }
6489
6490 int
6491 decode_coding_gap (coding, chars, bytes)
6492      struct coding_system *coding;
6493      EMACS_INT chars, bytes;
6494 {
6495   int count = specpdl_ptr - specpdl;
6496   Lisp_Object attrs;
6497
6498   code_conversion_save (0, 0);
6499
6500   coding->src_object = Fcurrent_buffer ();
6501   coding->src_chars = chars;
6502   coding->src_bytes = bytes;
6503   coding->src_pos = -chars;
6504   coding->src_pos_byte = -bytes;
6505   coding->src_multibyte = chars < bytes;
6506   coding->dst_object = coding->src_object;
6507   coding->dst_pos = PT;
6508   coding->dst_pos_byte = PT_BYTE;
6509   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6510   coding->mode |= CODING_MODE_LAST_BLOCK;
6511
6512   if (CODING_REQUIRE_DETECTION (coding))
6513     detect_coding (coding);
6514
6515   decode_coding (coding);
6516
6517   attrs = CODING_ID_ATTRS (coding->id);
6518   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6519     {
6520       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6521       Lisp_Object val;
6522
6523       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6524       val = call1 (CODING_ATTR_POST_READ (attrs),
6525                    make_number (coding->produced_char));
6526       CHECK_NATNUM (val);
6527       coding->produced_char += Z - prev_Z;
6528       coding->produced += Z_BYTE - prev_Z_BYTE;
6529     }
6530
6531   unbind_to (count, Qnil);
6532   return coding->result;
6533 }
6534
6535 int
6536 encode_coding_gap (coding, chars, bytes)
6537      struct coding_system *coding;
6538      EMACS_INT chars, bytes;
6539 {
6540   int count = specpdl_ptr - specpdl;
6541
6542   code_conversion_save (0, 0);
6543
6544   coding->src_object = Fcurrent_buffer ();
6545   coding->src_chars = chars;
6546   coding->src_bytes = bytes;
6547   coding->src_pos = -chars;
6548   coding->src_pos_byte = -bytes;
6549   coding->src_multibyte = chars < bytes;
6550   coding->dst_object = coding->src_object;
6551   coding->dst_pos = PT;
6552   coding->dst_pos_byte = PT_BYTE;
6553
6554   encode_coding (coding);
6555
6556   unbind_to (count, Qnil);
6557   return coding->result;
6558 }
6559
6560
6561 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6562    SRC_OBJECT into DST_OBJECT by coding context CODING.
6563
6564    SRC_OBJECT is a buffer, a string, or Qnil.
6565
6566    If it is a buffer, the text is at point of the buffer.  FROM and TO
6567    are positions in the buffer.
6568
6569    If it is a string, the text is at the beginning of the string.
6570    FROM and TO are indices to the string.
6571
6572    If it is nil, the text is at coding->source.  FROM and TO are
6573    indices to coding->source.
6574
6575    DST_OBJECT is a buffer, Qt, or Qnil.
6576
6577    If it is a buffer, the decoded text is inserted at point of the
6578    buffer.  If the buffer is the same as SRC_OBJECT, the source text
6579    is deleted.
6580
6581    If it is Qt, a string is made from the decoded text, and
6582    set in CODING->dst_object.
6583
6584    If it is Qnil, the decoded text is stored at CODING->destination.
6585    The caller must allocate CODING->dst_bytes bytes at
6586    CODING->destination by xmalloc.  If the decoded text is longer than
6587    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6588  */
6589
6590 void
6591 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6592                       dst_object)
6593      struct coding_system *coding;
6594      Lisp_Object src_object;
6595      EMACS_INT from, from_byte, to, to_byte;
6596      Lisp_Object dst_object;
6597 {
6598   int count = specpdl_ptr - specpdl;
6599   unsigned char *destination;
6600   EMACS_INT dst_bytes;
6601   EMACS_INT chars = to - from;
6602   EMACS_INT bytes = to_byte - from_byte;
6603   Lisp_Object attrs;
6604   Lisp_Object buffer;
6605   int saved_pt = -1, saved_pt_byte;
6606
6607   buffer = Fcurrent_buffer ();
6608
6609   if (NILP (dst_object))
6610     {
6611       destination = coding->destination;
6612       dst_bytes = coding->dst_bytes;
6613     }
6614
6615   coding->src_object = src_object;
6616   coding->src_chars = chars;
6617   coding->src_bytes = bytes;
6618   coding->src_multibyte = chars < bytes;
6619
6620   if (STRINGP (src_object))
6621     {
6622       coding->src_pos = from;
6623       coding->src_pos_byte = from_byte;
6624     }
6625   else if (BUFFERP (src_object))
6626     {
6627       set_buffer_internal (XBUFFER (src_object));
6628       if (from != GPT)
6629         move_gap_both (from, from_byte);
6630       if (EQ (src_object, dst_object))
6631         {
6632           saved_pt = PT, saved_pt_byte = PT_BYTE;
6633           TEMP_SET_PT_BOTH (from, from_byte);
6634           del_range_both (from, from_byte, to, to_byte, 1);
6635           coding->src_pos = -chars;
6636           coding->src_pos_byte = -bytes;
6637         }
6638       else
6639         {
6640           coding->src_pos = from;
6641           coding->src_pos_byte = from_byte;
6642         }
6643     }
6644
6645   if (CODING_REQUIRE_DETECTION (coding))
6646     detect_coding (coding);
6647   attrs = CODING_ID_ATTRS (coding->id);
6648
6649   if (EQ (dst_object, Qt)
6650       || (! NILP (CODING_ATTR_POST_READ (attrs))
6651           && NILP (dst_object)))
6652     {
6653       coding->dst_object = code_conversion_save (1, 1);
6654       coding->dst_pos = BEG;
6655       coding->dst_pos_byte = BEG_BYTE;
6656       coding->dst_multibyte = 1;
6657     }
6658   else if (BUFFERP (dst_object))
6659     {
6660       code_conversion_save (0, 0);
6661       coding->dst_object = dst_object;
6662       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6663       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6664       coding->dst_multibyte
6665         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6666     }
6667   else
6668     {
6669       code_conversion_save (0, 0);
6670       coding->dst_object = Qnil;
6671       coding->dst_multibyte = 1;
6672     }
6673
6674   decode_coding (coding);
6675
6676   if (BUFFERP (coding->dst_object))
6677     set_buffer_internal (XBUFFER (coding->dst_object));
6678
6679   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6680     {
6681       struct gcpro gcpro1, gcpro2;
6682       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6683       Lisp_Object val;
6684
6685       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6686       GCPRO2 (coding->src_object, coding->dst_object);
6687       val = call1 (CODING_ATTR_POST_READ (attrs),
6688                    make_number (coding->produced_char));
6689       UNGCPRO;
6690       CHECK_NATNUM (val);
6691       coding->produced_char += Z - prev_Z;
6692       coding->produced += Z_BYTE - prev_Z_BYTE;
6693     }
6694
6695   if (EQ (dst_object, Qt))
6696     {
6697       coding->dst_object = Fbuffer_string ();
6698     }
6699   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6700     {
6701       set_buffer_internal (XBUFFER (coding->dst_object));
6702       if (dst_bytes < coding->produced)
6703         {
6704           destination
6705             = (unsigned char *) xrealloc (destination, coding->produced);
6706           if (! destination)
6707             {
6708               record_conversion_result (coding,
6709                                         CODING_RESULT_INSUFFICIENT_DST);
6710               unbind_to (count, Qnil);
6711               return;
6712             }
6713           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6714             move_gap_both (BEGV, BEGV_BYTE);
6715           bcopy (BEGV_ADDR, destination, coding->produced);
6716           coding->destination = destination;
6717         }
6718     }
6719
6720   if (saved_pt >= 0)
6721     {
6722       /* This is the case of:
6723          (BUFFERP (src_object) && EQ (src_object, dst_object))
6724          As we have moved PT while replacing the original buffer
6725          contents, we must recover it now.  */
6726       set_buffer_internal (XBUFFER (src_object));
6727       if (saved_pt < from)
6728         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6729       else if (saved_pt < from + chars)
6730         TEMP_SET_PT_BOTH (from, from_byte);
6731       else if (! NILP (current_buffer->enable_multibyte_characters))
6732         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6733                           saved_pt_byte + (coding->produced - bytes));
6734       else
6735         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6736                           saved_pt_byte + (coding->produced - bytes));
6737     }
6738
6739   unbind_to (count, coding->dst_object);
6740 }
6741
6742
6743 void
6744 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6745                       dst_object)
6746      struct coding_system *coding;
6747      Lisp_Object src_object;
6748      EMACS_INT from, from_byte, to, to_byte;
6749      Lisp_Object dst_object;
6750 {
6751   int count = specpdl_ptr - specpdl;
6752   EMACS_INT chars = to - from;
6753   EMACS_INT bytes = to_byte - from_byte;
6754   Lisp_Object attrs;
6755   Lisp_Object buffer;
6756   int saved_pt = -1, saved_pt_byte;
6757
6758   buffer = Fcurrent_buffer ();
6759
6760   coding->src_object = src_object;
6761   coding->src_chars = chars;
6762   coding->src_bytes = bytes;
6763   coding->src_multibyte = chars < bytes;
6764
6765   attrs = CODING_ID_ATTRS (coding->id);
6766
6767   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6768     {
6769       coding->src_object = code_conversion_save (1, coding->src_multibyte);
6770       set_buffer_internal (XBUFFER (coding->src_object));
6771       if (STRINGP (src_object))
6772         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6773       else if (BUFFERP (src_object))
6774         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6775       else
6776         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6777
6778       if (EQ (src_object, dst_object))
6779         {
6780           set_buffer_internal (XBUFFER (src_object));
6781           saved_pt = PT, saved_pt_byte = PT_BYTE;
6782           del_range_both (from, from_byte, to, to_byte, 1);
6783           set_buffer_internal (XBUFFER (coding->src_object));
6784         }
6785
6786       call2 (CODING_ATTR_PRE_WRITE (attrs),
6787              make_number (BEG), make_number (Z));
6788       coding->src_object = Fcurrent_buffer ();
6789       if (BEG != GPT)
6790         move_gap_both (BEG, BEG_BYTE);
6791       coding->src_chars = Z - BEG;
6792       coding->src_bytes = Z_BYTE - BEG_BYTE;
6793       coding->src_pos = BEG;
6794       coding->src_pos_byte = BEG_BYTE;
6795       coding->src_multibyte = Z < Z_BYTE;
6796     }
6797   else if (STRINGP (src_object))
6798     {
6799       code_conversion_save (0, 0);
6800       coding->src_pos = from;
6801       coding->src_pos_byte = from_byte;
6802     }
6803   else if (BUFFERP (src_object))
6804     {
6805       code_conversion_save (0, 0);
6806       set_buffer_internal (XBUFFER (src_object));
6807       if (EQ (src_object, dst_object))
6808         {
6809           saved_pt = PT, saved_pt_byte = PT_BYTE;
6810           coding->src_object = del_range_1 (from, to, 1, 1);
6811           coding->src_pos = 0;
6812           coding->src_pos_byte = 0;
6813         }
6814       else
6815         {
6816           if (from < GPT && to >= GPT)
6817             move_gap_both (from, from_byte);
6818           coding->src_pos = from;
6819           coding->src_pos_byte = from_byte;
6820         }
6821     }
6822   else
6823     code_conversion_save (0, 0);
6824
6825   if (BUFFERP (dst_object))
6826     {
6827       coding->dst_object = dst_object;
6828       if (EQ (src_object, dst_object))
6829         {
6830           coding->dst_pos = from;
6831           coding->dst_pos_byte = from_byte;
6832         }
6833       else
6834         {
6835           coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6836           coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6837         }
6838       coding->dst_multibyte
6839         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6840     }
6841   else if (EQ (dst_object, Qt))
6842     {
6843       coding->dst_object = Qnil;
6844       coding->dst_bytes = coding->src_chars;
6845       if (coding->dst_bytes == 0)
6846         coding->dst_bytes = 1;
6847       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
6848       coding->dst_multibyte = 0;
6849     }
6850   else
6851     {
6852       coding->dst_object = Qnil;
6853       coding->dst_multibyte = 0;
6854     }
6855
6856   encode_coding (coding);
6857
6858   if (EQ (dst_object, Qt))
6859     {
6860       if (BUFFERP (coding->dst_object))
6861         coding->dst_object = Fbuffer_string ();
6862       else
6863         {
6864           coding->dst_object
6865             = make_unibyte_string ((char *) coding->destination,
6866                                    coding->produced);
6867           xfree (coding->destination);
6868         }
6869     }
6870
6871   if (saved_pt >= 0)
6872     {
6873       /* This is the case of:
6874          (BUFFERP (src_object) && EQ (src_object, dst_object))
6875          As we have moved PT while replacing the original buffer
6876          contents, we must recover it now.  */
6877       set_buffer_internal (XBUFFER (src_object));
6878       if (saved_pt < from)
6879         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6880       else if (saved_pt < from + chars)
6881         TEMP_SET_PT_BOTH (from, from_byte);
6882       else if (! NILP (current_buffer->enable_multibyte_characters))
6883         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6884                           saved_pt_byte + (coding->produced - bytes));
6885       else
6886         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6887                           saved_pt_byte + (coding->produced - bytes));
6888     }
6889
6890   unbind_to (count, Qnil);
6891 }
6892
6893
6894 Lisp_Object
6895 preferred_coding_system ()
6896 {
6897   int id = coding_categories[coding_priorities[0]].id;
6898
6899   return CODING_ID_NAME (id);
6900 }
6901
6902 \f
6903 #ifdef emacs
6904 /*** 8. Emacs Lisp library functions ***/
6905
6906 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6907        doc: /* Return t if OBJECT is nil or a coding-system.
6908 See the documentation of `define-coding-system' for information
6909 about coding-system objects.  */)
6910      (obj)
6911      Lisp_Object obj;
6912 {
6913   return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6914 }
6915
6916 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6917        Sread_non_nil_coding_system, 1, 1, 0,
6918        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6919      (prompt)
6920      Lisp_Object prompt;
6921 {
6922   Lisp_Object val;
6923   do
6924     {
6925       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6926                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6927     }
6928   while (SCHARS (val) == 0);
6929   return (Fintern (val, Qnil));
6930 }
6931
6932 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6933        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6934 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6935      (prompt, default_coding_system)
6936      Lisp_Object prompt, default_coding_system;
6937 {
6938   Lisp_Object val;
6939   if (SYMBOLP (default_coding_system))
6940     XSETSTRING (default_coding_system, XPNTR (SYMBOL_NAME (default_coding_system)));
6941   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6942                           Qt, Qnil, Qcoding_system_history,
6943                           default_coding_system, Qnil);
6944   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
6945 }
6946
6947 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6948        1, 1, 0,
6949        doc: /* Check validity of CODING-SYSTEM.
6950 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.  */)
6951   (coding_system)
6952      Lisp_Object coding_system;
6953 {
6954   CHECK_SYMBOL (coding_system);
6955   if (!NILP (Fcoding_system_p (coding_system)))
6956     return coding_system;
6957   while (1)
6958     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6959 }
6960
6961 \f
6962 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
6963    HIGHEST is nonzero, return the coding system of the highest
6964    priority among the detected coding systems.  Otherwize return a
6965    list of detected coding systems sorted by their priorities.  If
6966    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6967    multibyte form but contains only ASCII and eight-bit chars.
6968    Otherwise, the bytes are raw bytes.
6969
6970    CODING-SYSTEM controls the detection as below:
6971
6972    If it is nil, detect both text-format and eol-format.  If the
6973    text-format part of CODING-SYSTEM is already specified
6974    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
6975    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6976    detect only text-format.  */
6977
6978 Lisp_Object
6979 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
6980                       coding_system)
6981      const unsigned char *src;
6982      int src_chars, src_bytes, highest;
6983      int multibytep;
6984      Lisp_Object coding_system;
6985 {
6986   const unsigned char *src_end = src + src_bytes;
6987   Lisp_Object attrs, eol_type;
6988   Lisp_Object val;
6989   struct coding_system coding;
6990   int id;
6991   struct coding_detection_info detect_info;
6992   enum coding_category base_category;
6993
6994   if (NILP (coding_system))
6995     coding_system = Qundecided;
6996   setup_coding_system (coding_system, &coding);
6997   attrs = CODING_ID_ATTRS (coding.id);
6998   eol_type = CODING_ID_EOL_TYPE (coding.id);
6999   coding_system = CODING_ATTR_BASE_NAME (attrs);
7000
7001   coding.source = src;
7002   coding.src_chars = src_chars;
7003   coding.src_bytes = src_bytes;
7004   coding.src_multibyte = multibytep;
7005   coding.consumed = 0;
7006   coding.mode |= CODING_MODE_LAST_BLOCK;
7007
7008   detect_info.checked = detect_info.found = detect_info.rejected = 0;
7009
7010   /* At first, detect text-format if necessary.  */
7011   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7012   if (base_category == coding_category_undecided)
7013     {
7014       enum coding_category category;
7015       struct coding_system *this;
7016       int c, i;
7017
7018       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
7019       for (i = 0; src < src_end; i++, src++)
7020         {
7021           c = *src;
7022           if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
7023                                         || c == ISO_CODE_SI
7024                                         || c == ISO_CODE_SO)))
7025             break;
7026         }
7027       coding.head_ascii = src - coding.source;
7028
7029       if (src < src_end)
7030         for (i = 0; i < coding_category_raw_text; i++)
7031           {
7032             category = coding_priorities[i];
7033             this = coding_categories + category;
7034
7035             if (this->id < 0)
7036               {
7037                 /* No coding system of this category is defined.  */
7038                 detect_info.rejected |= (1 << category);
7039               }
7040             else if (category >= coding_category_raw_text)
7041               continue;
7042             else if (detect_info.checked & (1 << category))
7043               {
7044                 if (highest
7045                     && (detect_info.found & (1 << category)))
7046                   break;
7047               }
7048             else
7049               {
7050                 if ((*(this->detector)) (&coding, &detect_info)
7051                     && highest
7052                     && (detect_info.found & (1 << category)))
7053                   {
7054                     if (category == coding_category_utf_16_auto)
7055                       {
7056                         if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7057                           category = coding_category_utf_16_le;
7058                         else
7059                           category = coding_category_utf_16_be;
7060                       }
7061                     break;
7062                   }
7063               }
7064           }
7065
7066       if (detect_info.rejected == CATEGORY_MASK_ANY)
7067         {
7068           detect_info.found = CATEGORY_MASK_RAW_TEXT;
7069           id = coding_categories[coding_category_raw_text].id;
7070           val = Fcons (make_number (id), Qnil);
7071         }
7072       else if (! detect_info.rejected && ! detect_info.found)
7073         {
7074           detect_info.found = CATEGORY_MASK_ANY;
7075           id = coding_categories[coding_category_undecided].id;
7076           val = Fcons (make_number (id), Qnil);
7077         }
7078       else if (highest)
7079         {
7080           if (detect_info.found)
7081             {
7082               detect_info.found = 1 << category;
7083               val = Fcons (make_number (this->id), Qnil);
7084             }
7085           else
7086             for (i = 0; i < coding_category_raw_text; i++)
7087               if (! (detect_info.rejected & (1 << coding_priorities[i])))
7088                 {
7089                   detect_info.found = 1 << coding_priorities[i];
7090                   id = coding_categories[coding_priorities[i]].id;
7091                   val = Fcons (make_number (id), Qnil);
7092                   break;
7093                 }
7094         }
7095       else
7096         {
7097           int mask = detect_info.rejected | detect_info.found;
7098           int found = 0;
7099           val = Qnil;
7100
7101           for (i = coding_category_raw_text - 1; i >= 0; i--)
7102             {
7103               category = coding_priorities[i];
7104               if (! (mask & (1 << category)))
7105                 {
7106                   found |= 1 << category;
7107                   id = coding_categories[category].id;
7108                   val = Fcons (make_number (id), val);
7109                 }
7110             }
7111           for (i = coding_category_raw_text - 1; i >= 0; i--)
7112             {
7113               category = coding_priorities[i];
7114               if (detect_info.found & (1 << category))
7115                 {
7116                   id = coding_categories[category].id;
7117                   val = Fcons (make_number (id), val);
7118                 }
7119             }
7120           detect_info.found |= found;
7121         }
7122     }
7123   else if (base_category == coding_category_utf_16_auto)
7124     {
7125       if (detect_coding_utf_16 (&coding, &detect_info))
7126         {
7127           enum coding_category category;
7128           struct coding_system *this;
7129
7130           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7131             this = coding_categories + coding_category_utf_16_le;
7132           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7133             this = coding_categories + coding_category_utf_16_be;
7134           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7135             this = coding_categories + coding_category_utf_16_be_nosig;
7136           else
7137             this = coding_categories + coding_category_utf_16_le_nosig;
7138           val = Fcons (make_number (this->id), Qnil);
7139         }
7140     }
7141   else
7142     {
7143       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
7144       val = Fcons (make_number (coding.id), Qnil);
7145     }
7146
7147   /* Then, detect eol-format if necessary.  */
7148   {
7149     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
7150     Lisp_Object tail;
7151
7152     if (VECTORP (eol_type))
7153       {
7154         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
7155           normal_eol = detect_eol (coding.source, src_bytes,
7156                                    coding_category_raw_text);
7157         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7158                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
7159           utf_16_be_eol = detect_eol (coding.source, src_bytes,
7160                                       coding_category_utf_16_be);
7161         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7162                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
7163           utf_16_le_eol = detect_eol (coding.source, src_bytes,
7164                                       coding_category_utf_16_le);
7165       }
7166     else
7167       {
7168         if (EQ (eol_type, Qunix))
7169           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7170         else if (EQ (eol_type, Qdos))
7171           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7172         else
7173           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7174       }
7175
7176     for (tail = val; CONSP (tail); tail = XCDR (tail))
7177       {
7178         enum coding_category category;
7179         int this_eol;
7180
7181         id = XINT (XCAR (tail));
7182         attrs = CODING_ID_ATTRS (id);
7183         category = XINT (CODING_ATTR_CATEGORY (attrs));
7184         eol_type = CODING_ID_EOL_TYPE (id);
7185         if (VECTORP (eol_type))
7186           {
7187             if (category == coding_category_utf_16_be
7188                 || category == coding_category_utf_16_be_nosig)
7189               this_eol = utf_16_be_eol;
7190             else if (category == coding_category_utf_16_le
7191                      || category == coding_category_utf_16_le_nosig)
7192               this_eol = utf_16_le_eol;
7193             else
7194               this_eol = normal_eol;
7195
7196             if (this_eol == EOL_SEEN_LF)
7197               XSETCAR (tail, AREF (eol_type, 0));
7198             else if (this_eol == EOL_SEEN_CRLF)
7199               XSETCAR (tail, AREF (eol_type, 1));
7200             else if (this_eol == EOL_SEEN_CR)
7201               XSETCAR (tail, AREF (eol_type, 2));
7202             else
7203               XSETCAR (tail, CODING_ID_NAME (id));
7204           }
7205         else
7206           XSETCAR (tail, CODING_ID_NAME (id));
7207       }
7208   }
7209
7210   return (highest ? XCAR (val) : val);
7211 }
7212
7213
7214 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7215        2, 3, 0,
7216        doc: /* Detect coding system of the text in the region between START and END.
7217 Return a list of possible coding systems ordered by priority.
7218
7219 If only ASCII characters are found, it returns a list of single element
7220 `undecided' or its subsidiary coding system according to a detected
7221 end-of-line format.
7222
7223 If optional argument HIGHEST is non-nil, return the coding system of
7224 highest priority.  */)
7225      (start, end, highest)
7226      Lisp_Object start, end, highest;
7227 {
7228   int from, to;
7229   int from_byte, to_byte;
7230
7231   CHECK_NUMBER_COERCE_MARKER (start);
7232   CHECK_NUMBER_COERCE_MARKER (end);
7233
7234   validate_region (&start, &end);
7235   from = XINT (start), to = XINT (end);
7236   from_byte = CHAR_TO_BYTE (from);
7237   to_byte = CHAR_TO_BYTE (to);
7238
7239   if (from < GPT && to >= GPT)
7240     move_gap_both (to, to_byte);
7241
7242   return detect_coding_system (BYTE_POS_ADDR (from_byte),
7243                                to - from, to_byte - from_byte,
7244                                !NILP (highest),
7245                                !NILP (current_buffer
7246                                       ->enable_multibyte_characters),
7247                                Qnil);
7248 }
7249
7250 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7251        1, 2, 0,
7252        doc: /* Detect coding system of the text in STRING.
7253 Return a list of possible coding systems ordered by priority.
7254
7255 If only ASCII characters are found, it returns a list of single element
7256 `undecided' or its subsidiary coding system according to a detected
7257 end-of-line format.
7258
7259 If optional argument HIGHEST is non-nil, return the coding system of
7260 highest priority.  */)
7261      (string, highest)
7262      Lisp_Object string, highest;
7263 {
7264   CHECK_STRING (string);
7265
7266   return detect_coding_system (SDATA (string),
7267                                SCHARS (string), SBYTES (string),
7268                                !NILP (highest), STRING_MULTIBYTE (string),
7269                                Qnil);
7270 }
7271
7272
7273 static INLINE int
7274 char_encodable_p (c, attrs)
7275      int c;
7276      Lisp_Object attrs;
7277 {
7278   Lisp_Object tail;
7279   struct charset *charset;
7280   Lisp_Object translation_table;
7281
7282   translation_table = CODING_ATTR_TRANS_TBL (attrs);
7283   if (! NILP (translation_table))
7284     c = translate_char (translation_table, c);
7285   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7286        CONSP (tail); tail = XCDR (tail))
7287     {
7288       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7289       if (CHAR_CHARSET_P (c, charset))
7290         break;
7291     }
7292   return (! NILP (tail));
7293 }
7294
7295
7296 /* Return a list of coding systems that safely encode the text between
7297    START and END.  If EXCLUDE is non-nil, it is a list of coding
7298    systems not to check.  The returned list doesn't contain any such
7299    coding systems.  In any case, if the text contains only ASCII or is
7300    unibyte, return t.  */
7301
7302 DEFUN ("find-coding-systems-region-internal",
7303        Ffind_coding_systems_region_internal,
7304        Sfind_coding_systems_region_internal, 2, 3, 0,
7305        doc: /* Internal use only.  */)
7306      (start, end, exclude)
7307      Lisp_Object start, end, exclude;
7308 {
7309   Lisp_Object coding_attrs_list, safe_codings;
7310   EMACS_INT start_byte, end_byte;
7311   const unsigned char *p, *pbeg, *pend;
7312   int c;
7313   Lisp_Object tail, elt;
7314
7315   if (STRINGP (start))
7316     {
7317       if (!STRING_MULTIBYTE (start)
7318           || SCHARS (start) == SBYTES (start))
7319         return Qt;
7320       start_byte = 0;
7321       end_byte = SBYTES (start);
7322     }
7323   else
7324     {
7325       CHECK_NUMBER_COERCE_MARKER (start);
7326       CHECK_NUMBER_COERCE_MARKER (end);
7327       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7328         args_out_of_range (start, end);
7329       if (NILP (current_buffer->enable_multibyte_characters))
7330         return Qt;
7331       start_byte = CHAR_TO_BYTE (XINT (start));
7332       end_byte = CHAR_TO_BYTE (XINT (end));
7333       if (XINT (end) - XINT (start) == end_byte - start_byte)
7334         return Qt;
7335
7336       if (XINT (start) < GPT && XINT (end) > GPT)
7337         {
7338           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7339             move_gap_both (XINT (start), start_byte);
7340           else
7341             move_gap_both (XINT (end), end_byte);
7342         }
7343     }
7344
7345   coding_attrs_list = Qnil;
7346   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7347     if (NILP (exclude)
7348         || NILP (Fmemq (XCAR (tail), exclude)))
7349       {
7350         Lisp_Object attrs;
7351
7352         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7353         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7354             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7355           {
7356             ASET (attrs, coding_attr_trans_tbl,
7357                   get_translation_table (attrs, 1));
7358             coding_attrs_list = Fcons (attrs, coding_attrs_list);
7359           }
7360       }
7361
7362   if (STRINGP (start))
7363     p = pbeg = SDATA (start);
7364   else
7365     p = pbeg = BYTE_POS_ADDR (start_byte);
7366   pend = p + (end_byte - start_byte);
7367
7368   while (p < pend && ASCII_BYTE_P (*p)) p++;
7369   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7370
7371   while (p < pend)
7372     {
7373       if (ASCII_BYTE_P (*p))
7374         p++;
7375       else
7376         {
7377           c = STRING_CHAR_ADVANCE (p);
7378
7379           charset_map_loaded = 0;
7380           for (tail = coding_attrs_list; CONSP (tail);)
7381             {
7382               elt = XCAR (tail);
7383               if (NILP (elt))
7384                 tail = XCDR (tail);
7385               else if (char_encodable_p (c, elt))
7386                 tail = XCDR (tail);
7387               else if (CONSP (XCDR (tail)))
7388                 {
7389                   XSETCAR (tail, XCAR (XCDR (tail)));
7390                   XSETCDR (tail, XCDR (XCDR (tail)));
7391                 }
7392               else
7393                 {
7394                   XSETCAR (tail, Qnil);
7395                   tail = XCDR (tail);
7396                 }
7397             }
7398           if (charset_map_loaded)
7399             {
7400               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7401
7402               if (STRINGP (start))
7403                 pbeg = SDATA (start);
7404               else
7405                 pbeg = BYTE_POS_ADDR (start_byte);
7406               p = pbeg + p_offset;
7407               pend = pbeg + pend_offset;
7408             }
7409         }
7410     }
7411
7412   safe_codings = Qnil;
7413   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7414     if (! NILP (XCAR (tail)))
7415       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
7416
7417   return safe_codings;
7418 }
7419
7420
7421 DEFUN ("unencodable-char-position", Funencodable_char_position,
7422        Sunencodable_char_position, 3, 5, 0,
7423        doc: /*
7424 Return position of first un-encodable character in a region.
7425 START and END specfiy the region and CODING-SYSTEM specifies the
7426 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
7427
7428 If optional 4th argument COUNT is non-nil, it specifies at most how
7429 many un-encodable characters to search.  In this case, the value is a
7430 list of positions.
7431
7432 If optional 5th argument STRING is non-nil, it is a string to search
7433 for un-encodable characters.  In that case, START and END are indexes
7434 to the string.  */)
7435      (start, end, coding_system, count, string)
7436      Lisp_Object start, end, coding_system, count, string;
7437 {
7438   int n;
7439   struct coding_system coding;
7440   Lisp_Object attrs, charset_list, translation_table;
7441   Lisp_Object positions;
7442   int from, to;
7443   const unsigned char *p, *stop, *pend;
7444   int ascii_compatible;
7445
7446   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7447   attrs = CODING_ID_ATTRS (coding.id);
7448   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7449     return Qnil;
7450   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7451   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7452   translation_table = get_translation_table (attrs, 1);
7453
7454   if (NILP (string))
7455     {
7456       validate_region (&start, &end);
7457       from = XINT (start);
7458       to = XINT (end);
7459       if (NILP (current_buffer->enable_multibyte_characters)
7460           || (ascii_compatible
7461               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7462         return Qnil;
7463       p = CHAR_POS_ADDR (from);
7464       pend = CHAR_POS_ADDR (to);
7465       if (from < GPT && to >= GPT)
7466         stop = GPT_ADDR;
7467       else
7468         stop = pend;
7469     }
7470   else
7471     {
7472       CHECK_STRING (string);
7473       CHECK_NATNUM (start);
7474       CHECK_NATNUM (end);
7475       from = XINT (start);
7476       to = XINT (end);
7477       if (from > to
7478           || to > SCHARS (string))
7479         args_out_of_range_3 (string, start, end);
7480       if (! STRING_MULTIBYTE (string))
7481         return Qnil;
7482       p = SDATA (string) + string_char_to_byte (string, from);
7483       stop = pend = SDATA (string) + string_char_to_byte (string, to);
7484       if (ascii_compatible && (to - from) == (pend - p))
7485         return Qnil;
7486     }
7487
7488   if (NILP (count))
7489     n = 1;
7490   else
7491     {
7492       CHECK_NATNUM (count);
7493       n = XINT (count);
7494     }
7495
7496   positions = Qnil;
7497   while (1)
7498     {
7499       int c;
7500
7501       if (ascii_compatible)
7502         while (p < stop && ASCII_BYTE_P (*p))
7503           p++, from++;
7504       if (p >= stop)
7505         {
7506           if (p >= pend)
7507             break;
7508           stop = pend;
7509           p = GAP_END_ADDR;
7510         }
7511
7512       c = STRING_CHAR_ADVANCE (p);
7513       if (! (ASCII_CHAR_P (c) && ascii_compatible)
7514           && ! char_charset (translate_char (translation_table, c),
7515                              charset_list, NULL))
7516         {
7517           positions = Fcons (make_number (from), positions);
7518           n--;
7519           if (n == 0)
7520             break;
7521         }
7522
7523       from++;
7524     }
7525
7526   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
7527 }
7528
7529
7530 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7531        Scheck_coding_systems_region, 3, 3, 0,
7532        doc: /* Check if the region is encodable by coding systems.
7533
7534 START and END are buffer positions specifying the region.
7535 CODING-SYSTEM-LIST is a list of coding systems to check.
7536
7537 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7538 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7539 whole region, POS0, POS1, ... are buffer positions where non-encodable
7540 characters are found.
7541
7542 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7543 value is nil.
7544
7545 START may be a string.  In that case, check if the string is
7546 encodable, and the value contains indices to the string instead of
7547 buffer positions.  END is ignored.  */)
7548      (start, end, coding_system_list)
7549      Lisp_Object start, end, coding_system_list;
7550 {
7551   Lisp_Object list;
7552   EMACS_INT start_byte, end_byte;
7553   int pos;
7554   const unsigned char *p, *pbeg, *pend;
7555   int c;
7556   Lisp_Object tail, elt, attrs;
7557
7558   if (STRINGP (start))
7559     {
7560       if (!STRING_MULTIBYTE (start)
7561           && SCHARS (start) != SBYTES (start))
7562         return Qnil;
7563       start_byte = 0;
7564       end_byte = SBYTES (start);
7565       pos = 0;
7566     }
7567   else
7568     {
7569       CHECK_NUMBER_COERCE_MARKER (start);
7570       CHECK_NUMBER_COERCE_MARKER (end);
7571       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7572         args_out_of_range (start, end);
7573       if (NILP (current_buffer->enable_multibyte_characters))
7574         return Qnil;
7575       start_byte = CHAR_TO_BYTE (XINT (start));
7576       end_byte = CHAR_TO_BYTE (XINT (end));
7577       if (XINT (end) - XINT (start) == end_byte - start_byte)
7578         return Qt;
7579
7580       if (XINT (start) < GPT && XINT (end) > GPT)
7581         {
7582           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7583             move_gap_both (XINT (start), start_byte);
7584           else
7585             move_gap_both (XINT (end), end_byte);
7586         }
7587       pos = XINT (start);
7588     }
7589
7590   list = Qnil;
7591   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7592     {
7593       elt = XCAR (tail);
7594       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
7595       ASET (attrs, coding_attr_trans_tbl, get_translation_table (attrs, 1));
7596       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
7597     }
7598
7599   if (STRINGP (start))
7600     p = pbeg = SDATA (start);
7601   else
7602     p = pbeg = BYTE_POS_ADDR (start_byte);
7603   pend = p + (end_byte - start_byte);
7604
7605   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7606   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7607
7608   while (p < pend)
7609     {
7610       if (ASCII_BYTE_P (*p))
7611         p++;
7612       else
7613         {
7614           c = STRING_CHAR_ADVANCE (p);
7615
7616           charset_map_loaded = 0;
7617           for (tail = list; CONSP (tail); tail = XCDR (tail))
7618             {
7619               elt = XCDR (XCAR (tail));
7620               if (! char_encodable_p (c, XCAR (elt)))
7621                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7622             }
7623           if (charset_map_loaded)
7624             {
7625               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7626
7627               if (STRINGP (start))
7628                 pbeg = SDATA (start);
7629               else
7630                 pbeg = BYTE_POS_ADDR (start_byte);
7631               p = pbeg + p_offset;
7632               pend = pbeg + pend_offset;
7633             }
7634         }
7635       pos++;
7636     }
7637
7638   tail = list;
7639   list = Qnil;
7640   for (; CONSP (tail); tail = XCDR (tail))
7641     {
7642       elt = XCAR (tail);
7643       if (CONSP (XCDR (XCDR (elt))))
7644         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7645                       list);
7646     }
7647
7648   return list;
7649 }
7650
7651
7652 Lisp_Object
7653 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7654      Lisp_Object start, end, coding_system, dst_object;
7655      int encodep, norecord;
7656 {
7657   struct coding_system coding;
7658   EMACS_INT from, from_byte, to, to_byte;
7659   Lisp_Object src_object;
7660
7661   CHECK_NUMBER_COERCE_MARKER (start);
7662   CHECK_NUMBER_COERCE_MARKER (end);
7663   if (NILP (coding_system))
7664     coding_system = Qno_conversion;
7665   else
7666     CHECK_CODING_SYSTEM (coding_system);
7667   src_object = Fcurrent_buffer ();
7668   if (NILP (dst_object))
7669     dst_object = src_object;
7670   else if (! EQ (dst_object, Qt))
7671     CHECK_BUFFER (dst_object);
7672
7673   validate_region (&start, &end);
7674   from = XFASTINT (start);
7675   from_byte = CHAR_TO_BYTE (from);
7676   to = XFASTINT (end);
7677   to_byte = CHAR_TO_BYTE (to);
7678
7679   setup_coding_system (coding_system, &coding);
7680   coding.mode |= CODING_MODE_LAST_BLOCK;
7681
7682   if (encodep)
7683     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7684                           dst_object);
7685   else
7686     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7687                           dst_object);
7688   if (! norecord)
7689     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7690
7691   return (BUFFERP (dst_object)
7692           ? make_number (coding.produced_char)
7693           : coding.dst_object);
7694 }
7695
7696
7697 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
7698        3, 4, "r\nzCoding system: ",
7699        doc: /* Decode the current region from the specified coding system.
7700 When called from a program, takes four arguments:
7701         START, END, CODING-SYSTEM, and DESTINATION.
7702 START and END are buffer positions.
7703
7704 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7705 If nil, the region between START and END is replace by the decoded text.
7706 If buffer, the decoded text is inserted in the buffer.
7707 If t, the decoded text is returned.
7708
7709 This function sets `last-coding-system-used' to the precise coding system
7710 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7711 not fully specified.)
7712 It returns the length of the decoded text.  */)
7713      (start, end, coding_system, destination)
7714      Lisp_Object start, end, coding_system, destination;
7715 {
7716   return code_convert_region (start, end, coding_system, destination, 0, 0);
7717 }
7718
7719 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
7720        3, 4, "r\nzCoding system: ",
7721        doc: /* Encode the current region by specified coding system.
7722 When called from a program, takes three arguments:
7723 START, END, and CODING-SYSTEM.  START and END are buffer positions.
7724
7725 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7726 If nil, the region between START and END is replace by the encoded text.
7727 If buffer, the encoded text is inserted in the buffer.
7728 If t, the encoded text is returned.
7729
7730 This function sets `last-coding-system-used' to the precise coding system
7731 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7732 not fully specified.)
7733 It returns the length of the encoded text.  */)
7734   (start, end, coding_system, destination)
7735      Lisp_Object start, end, coding_system, destination;
7736 {
7737   return code_convert_region (start, end, coding_system, destination, 1, 0);
7738 }
7739
7740 Lisp_Object
7741 code_convert_string (string, coding_system, dst_object,
7742                      encodep, nocopy, norecord)
7743      Lisp_Object string, coding_system, dst_object;
7744      int encodep, nocopy, norecord;
7745 {
7746   struct coding_system coding;
7747   EMACS_INT chars, bytes;
7748
7749   CHECK_STRING (string);
7750   if (NILP (coding_system))
7751     {
7752       if (! norecord)
7753         Vlast_coding_system_used = Qno_conversion;
7754       if (NILP (dst_object))
7755         return (nocopy ? Fcopy_sequence (string) : string);
7756     }
7757
7758   if (NILP (coding_system))
7759     coding_system = Qno_conversion;
7760   else
7761     CHECK_CODING_SYSTEM (coding_system);
7762   if (NILP (dst_object))
7763     dst_object = Qt;
7764   else if (! EQ (dst_object, Qt))
7765     CHECK_BUFFER (dst_object);
7766
7767   setup_coding_system (coding_system, &coding);
7768   coding.mode |= CODING_MODE_LAST_BLOCK;
7769   chars = SCHARS (string);
7770   bytes = SBYTES (string);
7771   if (encodep)
7772     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7773   else
7774     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7775   if (! norecord)
7776     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7777
7778   return (BUFFERP (dst_object)
7779           ? make_number (coding.produced_char)
7780           : coding.dst_object);
7781 }
7782
7783
7784 /* Encode or decode STRING according to CODING_SYSTEM.
7785    Do not set Vlast_coding_system_used.
7786
7787    This function is called only from macros DECODE_FILE and
7788    ENCODE_FILE, thus we ignore character composition.  */
7789
7790 Lisp_Object
7791 code_convert_string_norecord (string, coding_system, encodep)
7792      Lisp_Object string, coding_system;
7793      int encodep;
7794 {
7795   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
7796 }
7797
7798
7799 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7800        2, 4, 0,
7801        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7802
7803 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7804 if the decoding operation is trivial.
7805
7806 Optional fourth arg BUFFER non-nil meant that the decoded text is
7807 inserted in BUFFER instead of returned as a string.  In this case,
7808 the return value is BUFFER.
7809
7810 This function sets `last-coding-system-used' to the precise coding system
7811 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7812 not fully specified.  */)
7813   (string, coding_system, nocopy, buffer)
7814      Lisp_Object string, coding_system, nocopy, buffer;
7815 {
7816   return code_convert_string (string, coding_system, buffer,
7817                               0, ! NILP (nocopy), 0);
7818 }
7819
7820 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7821        2, 4, 0,
7822        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7823
7824 Optional third arg NOCOPY non-nil means it is OK to return STRING
7825 itself if the encoding operation is trivial.
7826
7827 Optional fourth arg BUFFER non-nil meant that the encoded text is
7828 inserted in BUFFER instead of returned as a string.  In this case,
7829 the return value is BUFFER.
7830
7831 This function sets `last-coding-system-used' to the precise coding system
7832 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7833 not fully specified.)  */)
7834      (string, coding_system, nocopy, buffer)
7835      Lisp_Object string, coding_system, nocopy, buffer;
7836 {
7837   return code_convert_string (string, coding_system, buffer,
7838                               1, ! NILP (nocopy), 1);
7839 }
7840
7841 \f
7842 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7843        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7844 Return the corresponding character.  */)
7845      (code)
7846      Lisp_Object code;
7847 {
7848   Lisp_Object spec, attrs, val;
7849   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7850   int c;
7851
7852   CHECK_NATNUM (code);
7853   c = XFASTINT (code);
7854   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7855   attrs = AREF (spec, 0);
7856
7857   if (ASCII_BYTE_P (c)
7858       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7859     return code;
7860
7861   val = CODING_ATTR_CHARSET_LIST (attrs);
7862   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7863   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7864   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
7865
7866   if (c <= 0x7F)
7867     charset = charset_roman;
7868   else if (c >= 0xA0 && c < 0xDF)
7869     {
7870       charset = charset_kana;
7871       c -= 0x80;
7872     }
7873   else
7874     {
7875       int s1 = c >> 8, s2 = c & 0xFF;
7876
7877       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7878           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7879         error ("Invalid code: %d", code);
7880       SJIS_TO_JIS (c);
7881       charset = charset_kanji;
7882     }
7883   c = DECODE_CHAR (charset, c);
7884   if (c < 0)
7885     error ("Invalid code: %d", code);
7886   return make_number (c);
7887 }
7888
7889
7890 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7891        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7892 Return the corresponding code in SJIS.  */)
7893      (ch)
7894     Lisp_Object ch;
7895 {
7896   Lisp_Object spec, attrs, charset_list;
7897   int c;
7898   struct charset *charset;
7899   unsigned code;
7900
7901   CHECK_CHARACTER (ch);
7902   c = XFASTINT (ch);
7903   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7904   attrs = AREF (spec, 0);
7905
7906   if (ASCII_CHAR_P (c)
7907       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7908     return ch;
7909
7910   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7911   charset = char_charset (c, charset_list, &code);
7912   if (code == CHARSET_INVALID_CODE (charset))
7913     error ("Can't encode by shift_jis encoding: %d", c);
7914   JIS_TO_SJIS (code);
7915
7916   return make_number (code);
7917 }
7918
7919 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7920        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7921 Return the corresponding character.  */)
7922      (code)
7923      Lisp_Object code;
7924 {
7925   Lisp_Object spec, attrs, val;
7926   struct charset *charset_roman, *charset_big5, *charset;
7927   int c;
7928
7929   CHECK_NATNUM (code);
7930   c = XFASTINT (code);
7931   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7932   attrs = AREF (spec, 0);
7933
7934   if (ASCII_BYTE_P (c)
7935       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7936     return code;
7937
7938   val = CODING_ATTR_CHARSET_LIST (attrs);
7939   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7940   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7941
7942   if (c <= 0x7F)
7943     charset = charset_roman;
7944   else
7945     {
7946       int b1 = c >> 8, b2 = c & 0x7F;
7947       if (b1 < 0xA1 || b1 > 0xFE
7948           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7949         error ("Invalid code: %d", code);
7950       charset = charset_big5;
7951     }
7952   c = DECODE_CHAR (charset, (unsigned )c);
7953   if (c < 0)
7954     error ("Invalid code: %d", code);
7955   return make_number (c);
7956 }
7957
7958 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7959        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7960 Return the corresponding character code in Big5.  */)
7961      (ch)
7962      Lisp_Object ch;
7963 {
7964   Lisp_Object spec, attrs, charset_list;
7965   struct charset *charset;
7966   int c;
7967   unsigned code;
7968
7969   CHECK_CHARACTER (ch);
7970   c = XFASTINT (ch);
7971   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7972   attrs = AREF (spec, 0);
7973   if (ASCII_CHAR_P (c)
7974       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7975     return ch;
7976
7977   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7978   charset = char_charset (c, charset_list, &code);
7979   if (code == CHARSET_INVALID_CODE (charset))
7980     error ("Can't encode by Big5 encoding: %d", c);
7981
7982   return make_number (code);
7983 }
7984
7985 \f
7986 DEFUN ("set-terminal-coding-system-internal",
7987        Fset_terminal_coding_system_internal,
7988        Sset_terminal_coding_system_internal, 1, 1, 0,
7989        doc: /* Internal use only.  */)
7990      (coding_system)
7991      Lisp_Object coding_system;
7992 {
7993   CHECK_SYMBOL (coding_system);
7994   setup_coding_system (Fcheck_coding_system (coding_system),
7995                         &terminal_coding);
7996
7997   /* We had better not send unsafe characters to terminal.  */
7998   terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7999   /* Characer composition should be disabled.  */
8000   terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8001   terminal_coding.src_multibyte = 1;
8002   terminal_coding.dst_multibyte = 0;
8003   return Qnil;
8004 }
8005
8006 DEFUN ("set-safe-terminal-coding-system-internal",
8007        Fset_safe_terminal_coding_system_internal,
8008        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
8009        doc: /* Internal use only.  */)
8010      (coding_system)
8011      Lisp_Object coding_system;
8012 {
8013   CHECK_SYMBOL (coding_system);
8014   setup_coding_system (Fcheck_coding_system (coding_system),
8015                        &safe_terminal_coding);
8016   /* Characer composition should be disabled.  */
8017   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8018   safe_terminal_coding.src_multibyte = 1;
8019   safe_terminal_coding.dst_multibyte = 0;
8020   return Qnil;
8021 }
8022
8023 DEFUN ("terminal-coding-system",
8024        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
8025        doc: /* Return coding system specified for terminal output.  */)
8026      ()
8027 {
8028   return CODING_ID_NAME (terminal_coding.id);
8029 }
8030
8031 DEFUN ("set-keyboard-coding-system-internal",
8032        Fset_keyboard_coding_system_internal,
8033        Sset_keyboard_coding_system_internal, 1, 1, 0,
8034        doc: /* Internal use only.  */)
8035      (coding_system)
8036      Lisp_Object coding_system;
8037 {
8038   CHECK_SYMBOL (coding_system);
8039   setup_coding_system (Fcheck_coding_system (coding_system),
8040                        &keyboard_coding);
8041   /* Characer composition should be disabled.  */
8042   keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8043   return Qnil;
8044 }
8045
8046 DEFUN ("keyboard-coding-system",
8047        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
8048        doc: /* Return coding system specified for decoding keyboard input.  */)
8049      ()
8050 {
8051   return CODING_ID_NAME (keyboard_coding.id);
8052 }
8053
8054 \f
8055 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8056        Sfind_operation_coding_system,  1, MANY, 0,
8057        doc: /* Choose a coding system for an operation based on the target name.
8058 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8059 DECODING-SYSTEM is the coding system to use for decoding
8060 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8061 for encoding (in case OPERATION does encoding).
8062
8063 The first argument OPERATION specifies an I/O primitive:
8064   For file I/O, `insert-file-contents' or `write-region'.
8065   For process I/O, `call-process', `call-process-region', or `start-process'.
8066   For network I/O, `open-network-stream'.
8067
8068 The remaining arguments should be the same arguments that were passed
8069 to the primitive.  Depending on which primitive, one of those arguments
8070 is selected as the TARGET.  For example, if OPERATION does file I/O,
8071 whichever argument specifies the file name is TARGET.
8072
8073 TARGET has a meaning which depends on OPERATION:
8074   For file I/O, TARGET is a file name.
8075   For process I/O, TARGET is a process name.
8076   For network I/O, TARGET is a service name or a port number
8077
8078 This function looks up what specified for TARGET in,
8079 `file-coding-system-alist', `process-coding-system-alist',
8080 or `network-coding-system-alist' depending on OPERATION.
8081 They may specify a coding system, a cons of coding systems,
8082 or a function symbol to call.
8083 In the last case, we call the function with one argument,
8084 which is a list of all the arguments given to this function.
8085
8086 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
8087      (nargs, args)
8088      int nargs;
8089      Lisp_Object *args;
8090 {
8091   Lisp_Object operation, target_idx, target, val;
8092   register Lisp_Object chain;
8093
8094   if (nargs < 2)
8095     error ("Too few arguments");
8096   operation = args[0];
8097   if (!SYMBOLP (operation)
8098       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
8099     error ("Invalid first arguement");
8100   if (nargs < 1 + XINT (target_idx))
8101     error ("Too few arguments for operation: %s",
8102            SDATA (SYMBOL_NAME (operation)));
8103   target = args[XINT (target_idx) + 1];
8104   if (!(STRINGP (target)
8105         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
8106     error ("Invalid %dth argument", XINT (target_idx) + 1);
8107
8108   chain = ((EQ (operation, Qinsert_file_contents)
8109             || EQ (operation, Qwrite_region))
8110            ? Vfile_coding_system_alist
8111            : (EQ (operation, Qopen_network_stream)
8112               ? Vnetwork_coding_system_alist
8113               : Vprocess_coding_system_alist));
8114   if (NILP (chain))
8115     return Qnil;
8116
8117   for (; CONSP (chain); chain = XCDR (chain))
8118     {
8119       Lisp_Object elt;
8120
8121       elt = XCAR (chain);
8122       if (CONSP (elt)
8123           && ((STRINGP (target)
8124                && STRINGP (XCAR (elt))
8125                && fast_string_match (XCAR (elt), target) >= 0)
8126               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
8127         {
8128           val = XCDR (elt);
8129           /* Here, if VAL is both a valid coding system and a valid
8130              function symbol, we return VAL as a coding system.  */
8131           if (CONSP (val))
8132             return val;
8133           if (! SYMBOLP (val))
8134             return Qnil;
8135           if (! NILP (Fcoding_system_p (val)))
8136             return Fcons (val, val);
8137           if (! NILP (Ffboundp (val)))
8138             {
8139               val = call1 (val, Flist (nargs, args));
8140               if (CONSP (val))
8141                 return val;
8142               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8143                 return Fcons (val, val);
8144             }
8145           return Qnil;
8146         }
8147     }
8148   return Qnil;
8149 }
8150
8151 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
8152        Sset_coding_system_priority, 0, MANY, 0,
8153        doc: /* Assign higher priority to the coding systems given as arguments.
8154 If multiple coding systems belongs to the same category,
8155 all but the first one are ignored.
8156
8157 usage: (set-coding-system-priority ...)  */)
8158      (nargs, args)
8159      int nargs;
8160      Lisp_Object *args;
8161 {
8162   int i, j;
8163   int changed[coding_category_max];
8164   enum coding_category priorities[coding_category_max];
8165
8166   bzero (changed, sizeof changed);
8167
8168   for (i = j = 0; i < nargs; i++)
8169     {
8170       enum coding_category category;
8171       Lisp_Object spec, attrs;
8172
8173       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8174       attrs = AREF (spec, 0);
8175       category = XINT (CODING_ATTR_CATEGORY (attrs));
8176       if (changed[category])
8177         /* Ignore this coding system because a coding system of the
8178            same category already had a higher priority.  */
8179         continue;
8180       changed[category] = 1;
8181       priorities[j++] = category;
8182       if (coding_categories[category].id >= 0
8183           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8184         setup_coding_system (args[i], &coding_categories[category]);
8185       Fset (AREF (Vcoding_category_table, category), args[i]);
8186     }
8187
8188   /* Now we have decided top J priorities.  Reflect the order of the
8189      original priorities to the remaining priorities.  */
8190
8191   for (i = j, j = 0; i < coding_category_max; i++, j++)
8192     {
8193       while (j < coding_category_max
8194              && changed[coding_priorities[j]])
8195         j++;
8196       if (j == coding_category_max)
8197         abort ();
8198       priorities[i] = coding_priorities[j];
8199     }
8200
8201   bcopy (priorities, coding_priorities, sizeof priorities);
8202
8203   /* Update `coding-category-list'.  */
8204   Vcoding_category_list = Qnil;
8205   for (i = coding_category_max - 1; i >= 0; i--)
8206     Vcoding_category_list
8207       = Fcons (AREF (Vcoding_category_table, priorities[i]),
8208                Vcoding_category_list);
8209
8210   return Qnil;
8211 }
8212
8213 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8214        Scoding_system_priority_list, 0, 1, 0,
8215        doc: /* Return a list of coding systems ordered by their priorities.
8216 HIGHESTP non-nil means just return the highest priority one.  */)
8217      (highestp)
8218      Lisp_Object highestp;
8219 {
8220   int i;
8221   Lisp_Object val;
8222
8223   for (i = 0, val = Qnil; i < coding_category_max; i++)
8224     {
8225       enum coding_category category = coding_priorities[i];
8226       int id = coding_categories[category].id;
8227       Lisp_Object attrs;
8228
8229       if (id < 0)
8230         continue;
8231       attrs = CODING_ID_ATTRS (id);
8232       if (! NILP (highestp))
8233         return CODING_ATTR_BASE_NAME (attrs);
8234       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8235     }
8236   return Fnreverse (val);
8237 }
8238
8239 static char *suffixes[] = { "-unix", "-dos", "-mac" };
8240
8241 static Lisp_Object
8242 make_subsidiaries (base)
8243      Lisp_Object base;
8244 {
8245   Lisp_Object subsidiaries;
8246   int base_name_len = SBYTES (SYMBOL_NAME (base));
8247   char *buf = (char *) alloca (base_name_len + 6);
8248   int i;
8249
8250   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
8251   subsidiaries = Fmake_vector (make_number (3), Qnil);
8252   for (i = 0; i < 3; i++)
8253     {
8254       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
8255       ASET (subsidiaries, i, intern (buf));
8256     }
8257   return subsidiaries;
8258 }
8259
8260
8261 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
8262        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
8263        doc: /* For internal use only.
8264 usage: (define-coding-system-internal ...)  */)
8265      (nargs, args)
8266      int nargs;
8267      Lisp_Object *args;
8268 {
8269   Lisp_Object name;
8270   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
8271   Lisp_Object attrs;            /* Vector of attributes.  */
8272   Lisp_Object eol_type;
8273   Lisp_Object aliases;
8274   Lisp_Object coding_type, charset_list, safe_charsets;
8275   enum coding_category category;
8276   Lisp_Object tail, val;
8277   int max_charset_id = 0;
8278   int i;
8279
8280   if (nargs < coding_arg_max)
8281     goto short_args;
8282
8283   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
8284
8285   name = args[coding_arg_name];
8286   CHECK_SYMBOL (name);
8287   CODING_ATTR_BASE_NAME (attrs) = name;
8288
8289   val = args[coding_arg_mnemonic];
8290   if (! STRINGP (val))
8291     CHECK_CHARACTER (val);
8292   CODING_ATTR_MNEMONIC (attrs) = val;
8293
8294   coding_type = args[coding_arg_coding_type];
8295   CHECK_SYMBOL (coding_type);
8296   CODING_ATTR_TYPE (attrs) = coding_type;
8297
8298   charset_list = args[coding_arg_charset_list];
8299   if (SYMBOLP (charset_list))
8300     {
8301       if (EQ (charset_list, Qiso_2022))
8302         {
8303           if (! EQ (coding_type, Qiso_2022))
8304             error ("Invalid charset-list");
8305           charset_list = Viso_2022_charset_list;
8306         }
8307       else if (EQ (charset_list, Qemacs_mule))
8308         {
8309           if (! EQ (coding_type, Qemacs_mule))
8310             error ("Invalid charset-list");
8311           charset_list = Vemacs_mule_charset_list;
8312         }
8313       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8314         if (max_charset_id < XFASTINT (XCAR (tail)))
8315           max_charset_id = XFASTINT (XCAR (tail));
8316     }
8317   else
8318     {
8319       charset_list = Fcopy_sequence (charset_list);
8320       for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
8321         {
8322           struct charset *charset;
8323
8324           val = Fcar (tail);
8325           CHECK_CHARSET_GET_CHARSET (val, charset);
8326           if (EQ (coding_type, Qiso_2022)
8327               ? CHARSET_ISO_FINAL (charset) < 0
8328               : EQ (coding_type, Qemacs_mule)
8329               ? CHARSET_EMACS_MULE_ID (charset) < 0
8330               : 0)
8331             error ("Can't handle charset `%s'",
8332                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8333
8334           XSETCAR (tail, make_number (charset->id));
8335           if (max_charset_id < charset->id)
8336             max_charset_id = charset->id;
8337         }
8338     }
8339   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
8340
8341   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8342                                 make_number (255));
8343   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8344     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
8345   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
8346
8347   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
8348
8349   val = args[coding_arg_decode_translation_table];
8350   if (! CHAR_TABLE_P (val) && ! CONSP (val))
8351     CHECK_SYMBOL (val);
8352   CODING_ATTR_DECODE_TBL (attrs) = val;
8353
8354   val = args[coding_arg_encode_translation_table];
8355   if (! CHAR_TABLE_P (val) && ! CONSP (val))
8356     CHECK_SYMBOL (val);
8357   CODING_ATTR_ENCODE_TBL (attrs) = val;
8358
8359   val = args[coding_arg_post_read_conversion];
8360   CHECK_SYMBOL (val);
8361   CODING_ATTR_POST_READ (attrs) = val;
8362
8363   val = args[coding_arg_pre_write_conversion];
8364   CHECK_SYMBOL (val);
8365   CODING_ATTR_PRE_WRITE (attrs) = val;
8366
8367   val = args[coding_arg_default_char];
8368   if (NILP (val))
8369     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8370   else
8371     {
8372       CHECK_CHARACTER (val);
8373       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8374     }
8375
8376   val = args[coding_arg_for_unibyte];
8377   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
8378
8379   val = args[coding_arg_plist];
8380   CHECK_LIST (val);
8381   CODING_ATTR_PLIST (attrs) = val;
8382
8383   if (EQ (coding_type, Qcharset))
8384     {
8385       /* Generate a lisp vector of 256 elements.  Each element is nil,
8386          integer, or a list of charset IDs.
8387
8388          If Nth element is nil, the byte code N is invalid in this
8389          coding system.
8390
8391          If Nth element is a number NUM, N is the first byte of a
8392          charset whose ID is NUM.
8393
8394          If Nth element is a list of charset IDs, N is the first byte
8395          of one of them.  The list is sorted by dimensions of the
8396          charsets.  A charset of smaller dimension comes firtst. */
8397       val = Fmake_vector (make_number (256), Qnil);
8398
8399       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8400         {
8401           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8402           int dim = CHARSET_DIMENSION (charset);
8403           int idx = (dim - 1) * 4;
8404
8405           if (CHARSET_ASCII_COMPATIBLE_P (charset))
8406             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8407
8408           for (i = charset->code_space[idx];
8409                i <= charset->code_space[idx + 1]; i++)
8410             {
8411               Lisp_Object tmp, tmp2;
8412               int dim2;
8413
8414               tmp = AREF (val, i);
8415               if (NILP (tmp))
8416                 tmp = XCAR (tail);
8417               else if (NUMBERP (tmp))
8418                 {
8419                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8420                   if (dim < dim2)
8421                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
8422                   else
8423                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
8424                 }
8425               else
8426                 {
8427                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8428                     {
8429                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8430                       if (dim < dim2)
8431                         break;
8432                     }
8433                   if (NILP (tmp2))
8434                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8435                   else
8436                     {
8437                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8438                       XSETCAR (tmp2, XCAR (tail));
8439                     }
8440                 }
8441               ASET (val, i, tmp);
8442             }
8443         }
8444       ASET (attrs, coding_attr_charset_valids, val);
8445       category = coding_category_charset;
8446     }
8447   else if (EQ (coding_type, Qccl))
8448     {
8449       Lisp_Object valids;
8450
8451       if (nargs < coding_arg_ccl_max)
8452         goto short_args;
8453
8454       val = args[coding_arg_ccl_decoder];
8455       CHECK_CCL_PROGRAM (val);
8456       if (VECTORP (val))
8457         val = Fcopy_sequence (val);
8458       ASET (attrs, coding_attr_ccl_decoder, val);
8459
8460       val = args[coding_arg_ccl_encoder];
8461       CHECK_CCL_PROGRAM (val);
8462       if (VECTORP (val))
8463         val = Fcopy_sequence (val);
8464       ASET (attrs, coding_attr_ccl_encoder, val);
8465
8466       val = args[coding_arg_ccl_valids];
8467       valids = Fmake_string (make_number (256), make_number (0));
8468       for (tail = val; !NILP (tail); tail = Fcdr (tail))
8469         {
8470           int from, to;
8471
8472           val = Fcar (tail);
8473           if (INTEGERP (val))
8474             {
8475               from = to = XINT (val);
8476               if (from < 0 || from > 255)
8477                 args_out_of_range_3 (val, make_number (0), make_number (255));
8478             }
8479           else
8480             {
8481               CHECK_CONS (val);
8482               CHECK_NATNUM_CAR (val);
8483               CHECK_NATNUM_CDR (val);
8484               from = XINT (XCAR (val));
8485               if (from > 255)
8486                 args_out_of_range_3 (XCAR (val),
8487                                      make_number (0), make_number (255));
8488               to = XINT (XCDR (val));
8489               if (to < from || to > 255)
8490                 args_out_of_range_3 (XCDR (val),
8491                                      XCAR (val), make_number (255));
8492             }
8493           for (i = from; i <= to; i++)
8494             SSET (valids, i, 1);
8495         }
8496       ASET (attrs, coding_attr_ccl_valids, valids);
8497
8498       category = coding_category_ccl;
8499     }
8500   else if (EQ (coding_type, Qutf_16))
8501     {
8502       Lisp_Object bom, endian;
8503
8504       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8505
8506       if (nargs < coding_arg_utf16_max)
8507         goto short_args;
8508
8509       bom = args[coding_arg_utf16_bom];
8510       if (! NILP (bom) && ! EQ (bom, Qt))
8511         {
8512           CHECK_CONS (bom);
8513           val = XCAR (bom);
8514           CHECK_CODING_SYSTEM (val);
8515           val = XCDR (bom);
8516           CHECK_CODING_SYSTEM (val);
8517         }
8518       ASET (attrs, coding_attr_utf_16_bom, bom);
8519
8520       endian = args[coding_arg_utf16_endian];
8521       CHECK_SYMBOL (endian);
8522       if (NILP (endian))
8523         endian = Qbig;
8524       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8525         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
8526       ASET (attrs, coding_attr_utf_16_endian, endian);
8527
8528       category = (CONSP (bom)
8529                   ? coding_category_utf_16_auto
8530                   : NILP (bom)
8531                   ? (EQ (endian, Qbig)
8532                      ? coding_category_utf_16_be_nosig
8533                      : coding_category_utf_16_le_nosig)
8534                   : (EQ (endian, Qbig)
8535                      ? coding_category_utf_16_be
8536                      : coding_category_utf_16_le));
8537     }
8538   else if (EQ (coding_type, Qiso_2022))
8539     {
8540       Lisp_Object initial, reg_usage, request, flags;
8541       int i;
8542
8543       if (nargs < coding_arg_iso2022_max)
8544         goto short_args;
8545
8546       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8547       CHECK_VECTOR (initial);
8548       for (i = 0; i < 4; i++)
8549         {
8550           val = Faref (initial, make_number (i));
8551           if (! NILP (val))
8552             {
8553               struct charset *charset;
8554
8555               CHECK_CHARSET_GET_CHARSET (val, charset);
8556               ASET (initial, i, make_number (CHARSET_ID (charset)));
8557               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8558                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8559             }
8560           else
8561             ASET (initial, i, make_number (-1));
8562         }
8563
8564       reg_usage = args[coding_arg_iso2022_reg_usage];
8565       CHECK_CONS (reg_usage);
8566       CHECK_NUMBER_CAR (reg_usage);
8567       CHECK_NUMBER_CDR (reg_usage);
8568
8569       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8570       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
8571         {
8572           int id;
8573           Lisp_Object tmp;
8574
8575           val = Fcar (tail);
8576           CHECK_CONS (val);
8577           tmp = XCAR (val);
8578           CHECK_CHARSET_GET_ID (tmp, id);
8579           CHECK_NATNUM_CDR (val);
8580           if (XINT (XCDR (val)) >= 4)
8581             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8582           XSETCAR (val, make_number (id));
8583         }
8584
8585       flags = args[coding_arg_iso2022_flags];
8586       CHECK_NATNUM (flags);
8587       i = XINT (flags);
8588       if (EQ (args[coding_arg_charset_list], Qiso_2022))
8589         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8590
8591       ASET (attrs, coding_attr_iso_initial, initial);
8592       ASET (attrs, coding_attr_iso_usage, reg_usage);
8593       ASET (attrs, coding_attr_iso_request, request);
8594       ASET (attrs, coding_attr_iso_flags, flags);
8595       setup_iso_safe_charsets (attrs);
8596
8597       if (i & CODING_ISO_FLAG_SEVEN_BITS)
8598         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8599                           | CODING_ISO_FLAG_SINGLE_SHIFT))
8600                     ? coding_category_iso_7_else
8601                     : EQ (args[coding_arg_charset_list], Qiso_2022)
8602                     ? coding_category_iso_7
8603                     : coding_category_iso_7_tight);
8604       else
8605         {
8606           int id = XINT (AREF (initial, 1));
8607
8608           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
8609                        || EQ (args[coding_arg_charset_list], Qiso_2022)
8610                        || id < 0)
8611                       ? coding_category_iso_8_else
8612                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8613                       ? coding_category_iso_8_1
8614                       : coding_category_iso_8_2);
8615         }
8616       if (category != coding_category_iso_8_1
8617           && category != coding_category_iso_8_2)
8618         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8619     }
8620   else if (EQ (coding_type, Qemacs_mule))
8621     {
8622       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8623         ASET (attrs, coding_attr_emacs_mule_full, Qt);
8624       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8625       category = coding_category_emacs_mule;
8626     }
8627   else if (EQ (coding_type, Qshift_jis))
8628     {
8629
8630       struct charset *charset;
8631
8632       if (XINT (Flength (charset_list)) != 3
8633           && XINT (Flength (charset_list)) != 4)
8634         error ("There should be three or four charsets");
8635
8636       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8637       if (CHARSET_DIMENSION (charset) != 1)
8638         error ("Dimension of charset %s is not one",
8639                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8640       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8641         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8642
8643       charset_list = XCDR (charset_list);
8644       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8645       if (CHARSET_DIMENSION (charset) != 1)
8646         error ("Dimension of charset %s is not one",
8647                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8648
8649       charset_list = XCDR (charset_list);
8650       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8651       if (CHARSET_DIMENSION (charset) != 2)
8652         error ("Dimension of charset %s is not two",
8653                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8654
8655       charset_list = XCDR (charset_list);
8656       if (! NILP (charset_list))
8657         {
8658           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8659           if (CHARSET_DIMENSION (charset) != 2)
8660             error ("Dimension of charset %s is not two",
8661                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8662         }
8663
8664       category = coding_category_sjis;
8665       Vsjis_coding_system = name;
8666     }
8667   else if (EQ (coding_type, Qbig5))
8668     {
8669       struct charset *charset;
8670
8671       if (XINT (Flength (charset_list)) != 2)
8672         error ("There should be just two charsets");
8673
8674       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8675       if (CHARSET_DIMENSION (charset) != 1)
8676         error ("Dimension of charset %s is not one",
8677                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8678       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8679         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8680
8681       charset_list = XCDR (charset_list);
8682       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8683       if (CHARSET_DIMENSION (charset) != 2)
8684         error ("Dimension of charset %s is not two",
8685                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8686
8687       category = coding_category_big5;
8688       Vbig5_coding_system = name;
8689     }
8690   else if (EQ (coding_type, Qraw_text))
8691     {
8692       category = coding_category_raw_text;
8693       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8694     }
8695   else if (EQ (coding_type, Qutf_8))
8696     {
8697       category = coding_category_utf_8;
8698       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8699     }
8700   else if (EQ (coding_type, Qundecided))
8701     category = coding_category_undecided;
8702   else
8703     error ("Invalid coding system type: %s",
8704            SDATA (SYMBOL_NAME (coding_type)));
8705
8706   CODING_ATTR_CATEGORY (attrs) = make_number (category);
8707   CODING_ATTR_PLIST (attrs)
8708     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
8709                                 CODING_ATTR_PLIST (attrs)));
8710
8711   eol_type = args[coding_arg_eol_type];
8712   if (! NILP (eol_type)
8713       && ! EQ (eol_type, Qunix)
8714       && ! EQ (eol_type, Qdos)
8715       && ! EQ (eol_type, Qmac))
8716     error ("Invalid eol-type");
8717
8718   aliases = Fcons (name, Qnil);
8719
8720   if (NILP (eol_type))
8721     {
8722       eol_type = make_subsidiaries (name);
8723       for (i = 0; i < 3; i++)
8724         {
8725           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8726
8727           this_name = AREF (eol_type, i);
8728           this_aliases = Fcons (this_name, Qnil);
8729           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8730           this_spec = Fmake_vector (make_number (3), attrs);
8731           ASET (this_spec, 1, this_aliases);
8732           ASET (this_spec, 2, this_eol_type);
8733           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8734           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8735           Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8736                                         Vcoding_system_alist);
8737         }
8738     }
8739
8740   spec_vec = Fmake_vector (make_number (3), attrs);
8741   ASET (spec_vec, 1, aliases);
8742   ASET (spec_vec, 2, eol_type);
8743
8744   Fputhash (name, spec_vec, Vcoding_system_hash_table);
8745   Vcoding_system_list = Fcons (name, Vcoding_system_list);
8746   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8747                                 Vcoding_system_alist);
8748
8749   {
8750     int id = coding_categories[category].id;
8751
8752     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8753       setup_coding_system (name, &coding_categories[category]);
8754   }
8755
8756   return Qnil;
8757
8758  short_args:
8759   return Fsignal (Qwrong_number_of_arguments,
8760                   Fcons (intern ("define-coding-system-internal"),
8761                          make_number (nargs)));
8762 }
8763
8764
8765 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
8766        3, 3, 0,
8767        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
8768   (coding_system, prop, val)
8769      Lisp_Object coding_system, prop, val;
8770 {
8771   Lisp_Object spec, attrs, plist;
8772
8773   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8774   attrs = AREF (spec, 0);
8775   if (EQ (prop, QCmnemonic))
8776     {
8777       if (! STRINGP (val))
8778         CHECK_CHARACTER (val);
8779       CODING_ATTR_MNEMONIC (attrs) = val;
8780     }
8781   else if (EQ (prop, QCdefalut_char))
8782     {
8783       if (NILP (val))
8784         val = make_number (' ');
8785       else
8786         CHECK_CHARACTER (val);
8787       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8788     }
8789   else if (EQ (prop, QCdecode_translation_table))
8790     {
8791       if (! CHAR_TABLE_P (val) && ! CONSP (val))
8792         CHECK_SYMBOL (val);
8793       CODING_ATTR_DECODE_TBL (attrs) = val;
8794     }
8795   else if (EQ (prop, QCencode_translation_table))
8796     {
8797       if (! CHAR_TABLE_P (val) && ! CONSP (val))
8798         CHECK_SYMBOL (val);
8799       CODING_ATTR_ENCODE_TBL (attrs) = val;
8800     }
8801   else if (EQ (prop, QCpost_read_conversion))
8802     {
8803       CHECK_SYMBOL (val);
8804       CODING_ATTR_POST_READ (attrs) = val;
8805     }
8806   else if (EQ (prop, QCpre_write_conversion))
8807     {
8808       CHECK_SYMBOL (val);
8809       CODING_ATTR_PRE_WRITE (attrs) = val;
8810     }
8811
8812   CODING_ATTR_PLIST (attrs)
8813     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
8814   return val;
8815 }
8816
8817
8818 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8819        Sdefine_coding_system_alias, 2, 2, 0,
8820        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
8821      (alias, coding_system)
8822      Lisp_Object alias, coding_system;
8823 {
8824   Lisp_Object spec, aliases, eol_type;
8825
8826   CHECK_SYMBOL (alias);
8827   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8828   aliases = AREF (spec, 1);
8829   /* ALISES should be a list of length more than zero, and the first
8830      element is a base coding system.  Append ALIAS at the tail of the
8831      list.  */
8832   while (!NILP (XCDR (aliases)))
8833     aliases = XCDR (aliases);
8834   XSETCDR (aliases, Fcons (alias, Qnil));
8835
8836   eol_type = AREF (spec, 2);
8837   if (VECTORP (eol_type))
8838     {
8839       Lisp_Object subsidiaries;
8840       int i;
8841
8842       subsidiaries = make_subsidiaries (alias);
8843       for (i = 0; i < 3; i++)
8844         Fdefine_coding_system_alias (AREF (subsidiaries, i),
8845                                      AREF (eol_type, i));
8846     }
8847
8848   Fputhash (alias, spec, Vcoding_system_hash_table);
8849   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
8850   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8851                                 Vcoding_system_alist);
8852
8853   return Qnil;
8854 }
8855
8856 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8857        1, 1, 0,
8858        doc: /* Return the base of CODING-SYSTEM.
8859 Any alias or subsidiary coding system is not a base coding system.  */)
8860   (coding_system)
8861      Lisp_Object coding_system;
8862 {
8863   Lisp_Object spec, attrs;
8864
8865   if (NILP (coding_system))
8866     return (Qno_conversion);
8867   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8868   attrs = AREF (spec, 0);
8869   return CODING_ATTR_BASE_NAME (attrs);
8870 }
8871
8872 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8873        1, 1, 0,
8874        doc: "Return the property list of CODING-SYSTEM.")
8875      (coding_system)
8876      Lisp_Object coding_system;
8877 {
8878   Lisp_Object spec, attrs;
8879
8880   if (NILP (coding_system))
8881     coding_system = Qno_conversion;
8882   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8883   attrs = AREF (spec, 0);
8884   return CODING_ATTR_PLIST (attrs);
8885 }
8886
8887
8888 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8889        1, 1, 0,
8890        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
8891      (coding_system)
8892      Lisp_Object coding_system;
8893 {
8894   Lisp_Object spec;
8895
8896   if (NILP (coding_system))
8897     coding_system = Qno_conversion;
8898   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8899   return AREF (spec, 1);
8900 }
8901
8902 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8903        Scoding_system_eol_type, 1, 1, 0,
8904        doc: /* Return eol-type of CODING-SYSTEM.
8905 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8906
8907 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8908 and CR respectively.
8909
8910 A vector value indicates that a format of end-of-line should be
8911 detected automatically.  Nth element of the vector is the subsidiary
8912 coding system whose eol-type is N.  */)
8913      (coding_system)
8914      Lisp_Object coding_system;
8915 {
8916   Lisp_Object spec, eol_type;
8917   int n;
8918
8919   if (NILP (coding_system))
8920     coding_system = Qno_conversion;
8921   if (! CODING_SYSTEM_P (coding_system))
8922     return Qnil;
8923   spec = CODING_SYSTEM_SPEC (coding_system);
8924   eol_type = AREF (spec, 2);
8925   if (VECTORP (eol_type))
8926     return Fcopy_sequence (eol_type);
8927   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8928   return make_number (n);
8929 }
8930
8931 #endif /* emacs */
8932
8933 \f
8934 /*** 9. Post-amble ***/
8935
8936 void
8937 init_coding_once ()
8938 {
8939   int i;
8940
8941   for (i = 0; i < coding_category_max; i++)
8942     {
8943       coding_categories[i].id = -1;
8944       coding_priorities[i] = i;
8945     }
8946
8947   /* ISO2022 specific initialize routine.  */
8948   for (i = 0; i < 0x20; i++)
8949     iso_code_class[i] = ISO_control_0;
8950   for (i = 0x21; i < 0x7F; i++)
8951     iso_code_class[i] = ISO_graphic_plane_0;
8952   for (i = 0x80; i < 0xA0; i++)
8953     iso_code_class[i] = ISO_control_1;
8954   for (i = 0xA1; i < 0xFF; i++)
8955     iso_code_class[i] = ISO_graphic_plane_1;
8956   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8957   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8958   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8959   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8960   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8961   iso_code_class[ISO_CODE_ESC] = ISO_escape;
8962   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8963   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
8964   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
8965
8966   for (i = 0; i < 256; i++)
8967     {
8968       emacs_mule_bytes[i] = 1;
8969     }
8970   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
8971   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
8972   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
8973   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
8974 }
8975
8976 #ifdef emacs
8977
8978 void
8979 syms_of_coding ()
8980 {
8981   staticpro (&Vcoding_system_hash_table);
8982   {
8983     Lisp_Object args[2];
8984     args[0] = QCtest;
8985     args[1] = Qeq;
8986     Vcoding_system_hash_table = Fmake_hash_table (2, args);
8987   }
8988
8989   staticpro (&Vsjis_coding_system);
8990   Vsjis_coding_system = Qnil;
8991
8992   staticpro (&Vbig5_coding_system);
8993   Vbig5_coding_system = Qnil;
8994
8995   staticpro (&Vcode_conversion_reused_workbuf);
8996   Vcode_conversion_reused_workbuf = Qnil;
8997
8998   staticpro (&Vcode_conversion_workbuf_name);
8999   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
9000
9001   reused_workbuf_in_use = 0;
9002
9003   DEFSYM (Qcharset, "charset");
9004   DEFSYM (Qtarget_idx, "target-idx");
9005   DEFSYM (Qcoding_system_history, "coding-system-history");
9006   Fset (Qcoding_system_history, Qnil);
9007
9008   /* Target FILENAME is the first argument.  */
9009   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9010   /* Target FILENAME is the third argument.  */
9011   Fput (Qwrite_region, Qtarget_idx, make_number (2));
9012
9013   DEFSYM (Qcall_process, "call-process");
9014   /* Target PROGRAM is the first argument.  */
9015   Fput (Qcall_process, Qtarget_idx, make_number (0));
9016
9017   DEFSYM (Qcall_process_region, "call-process-region");
9018   /* Target PROGRAM is the third argument.  */
9019   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9020
9021   DEFSYM (Qstart_process, "start-process");
9022   /* Target PROGRAM is the third argument.  */
9023   Fput (Qstart_process, Qtarget_idx, make_number (2));
9024
9025   DEFSYM (Qopen_network_stream, "open-network-stream");
9026   /* Target SERVICE is the fourth argument.  */
9027   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9028
9029   DEFSYM (Qcoding_system, "coding-system");
9030   DEFSYM (Qcoding_aliases, "coding-aliases");
9031
9032   DEFSYM (Qeol_type, "eol-type");
9033   DEFSYM (Qunix, "unix");
9034   DEFSYM (Qdos, "dos");
9035
9036   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9037   DEFSYM (Qpost_read_conversion, "post-read-conversion");
9038   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9039   DEFSYM (Qdefault_char, "default-char");
9040   DEFSYM (Qundecided, "undecided");
9041   DEFSYM (Qno_conversion, "no-conversion");
9042   DEFSYM (Qraw_text, "raw-text");
9043
9044   DEFSYM (Qiso_2022, "iso-2022");
9045
9046   DEFSYM (Qutf_8, "utf-8");
9047   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
9048
9049   DEFSYM (Qutf_16, "utf-16");
9050   DEFSYM (Qbig, "big");
9051   DEFSYM (Qlittle, "little");
9052
9053   DEFSYM (Qshift_jis, "shift-jis");
9054   DEFSYM (Qbig5, "big5");
9055
9056   DEFSYM (Qcoding_system_p, "coding-system-p");
9057
9058   DEFSYM (Qcoding_system_error, "coding-system-error");
9059   Fput (Qcoding_system_error, Qerror_conditions,
9060         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9061   Fput (Qcoding_system_error, Qerror_message,
9062         build_string ("Invalid coding system"));
9063
9064   /* Intern this now in case it isn't already done.
9065      Setting this variable twice is harmless.
9066      But don't staticpro it here--that is done in alloc.c.  */
9067   Qchar_table_extra_slots = intern ("char-table-extra-slots");
9068
9069   DEFSYM (Qtranslation_table, "translation-table");
9070   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
9071   DEFSYM (Qtranslation_table_id, "translation-table-id");
9072   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9073   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
9074
9075   DEFSYM (Qvalid_codes, "valid-codes");
9076
9077   DEFSYM (Qemacs_mule, "emacs-mule");
9078
9079   DEFSYM (QCcategory, ":category");
9080   DEFSYM (QCmnemonic, ":mnemonic");
9081   DEFSYM (QCdefalut_char, ":default-char");
9082   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9083   DEFSYM (QCencode_translation_table, ":encode-translation-table");
9084   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9085   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
9086
9087   Vcoding_category_table
9088     = Fmake_vector (make_number (coding_category_max), Qnil);
9089   staticpro (&Vcoding_category_table);
9090   /* Followings are target of code detection.  */
9091   ASET (Vcoding_category_table, coding_category_iso_7,
9092         intern ("coding-category-iso-7"));
9093   ASET (Vcoding_category_table, coding_category_iso_7_tight,
9094         intern ("coding-category-iso-7-tight"));
9095   ASET (Vcoding_category_table, coding_category_iso_8_1,
9096         intern ("coding-category-iso-8-1"));
9097   ASET (Vcoding_category_table, coding_category_iso_8_2,
9098         intern ("coding-category-iso-8-2"));
9099   ASET (Vcoding_category_table, coding_category_iso_7_else,
9100         intern ("coding-category-iso-7-else"));
9101   ASET (Vcoding_category_table, coding_category_iso_8_else,
9102         intern ("coding-category-iso-8-else"));
9103   ASET (Vcoding_category_table, coding_category_utf_8,
9104         intern ("coding-category-utf-8"));
9105   ASET (Vcoding_category_table, coding_category_utf_16_be,
9106         intern ("coding-category-utf-16-be"));
9107   ASET (Vcoding_category_table, coding_category_utf_16_auto,
9108         intern ("coding-category-utf-16-auto"));
9109   ASET (Vcoding_category_table, coding_category_utf_16_le,
9110         intern ("coding-category-utf-16-le"));
9111   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9112         intern ("coding-category-utf-16-be-nosig"));
9113   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9114         intern ("coding-category-utf-16-le-nosig"));
9115   ASET (Vcoding_category_table, coding_category_charset,
9116         intern ("coding-category-charset"));
9117   ASET (Vcoding_category_table, coding_category_sjis,
9118         intern ("coding-category-sjis"));
9119   ASET (Vcoding_category_table, coding_category_big5,
9120         intern ("coding-category-big5"));
9121   ASET (Vcoding_category_table, coding_category_ccl,
9122         intern ("coding-category-ccl"));
9123   ASET (Vcoding_category_table, coding_category_emacs_mule,
9124         intern ("coding-category-emacs-mule"));
9125   /* Followings are NOT target of code detection.  */
9126   ASET (Vcoding_category_table, coding_category_raw_text,
9127         intern ("coding-category-raw-text"));
9128   ASET (Vcoding_category_table, coding_category_undecided,
9129         intern ("coding-category-undecided"));
9130
9131   DEFSYM (Qinsufficient_source, "insufficient-source");
9132   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9133   DEFSYM (Qinvalid_source, "invalid-source");
9134   DEFSYM (Qinterrupted, "interrupted");
9135   DEFSYM (Qinsufficient_memory, "insufficient-memory");
9136
9137   defsubr (&Scoding_system_p);
9138   defsubr (&Sread_coding_system);
9139   defsubr (&Sread_non_nil_coding_system);
9140   defsubr (&Scheck_coding_system);
9141   defsubr (&Sdetect_coding_region);
9142   defsubr (&Sdetect_coding_string);
9143   defsubr (&Sfind_coding_systems_region_internal);
9144   defsubr (&Sunencodable_char_position);
9145   defsubr (&Scheck_coding_systems_region);
9146   defsubr (&Sdecode_coding_region);
9147   defsubr (&Sencode_coding_region);
9148   defsubr (&Sdecode_coding_string);
9149   defsubr (&Sencode_coding_string);
9150   defsubr (&Sdecode_sjis_char);
9151   defsubr (&Sencode_sjis_char);
9152   defsubr (&Sdecode_big5_char);
9153   defsubr (&Sencode_big5_char);
9154   defsubr (&Sset_terminal_coding_system_internal);
9155   defsubr (&Sset_safe_terminal_coding_system_internal);
9156   defsubr (&Sterminal_coding_system);
9157   defsubr (&Sset_keyboard_coding_system_internal);
9158   defsubr (&Skeyboard_coding_system);
9159   defsubr (&Sfind_operation_coding_system);
9160   defsubr (&Sset_coding_system_priority);
9161   defsubr (&Sdefine_coding_system_internal);
9162   defsubr (&Sdefine_coding_system_alias);
9163   defsubr (&Scoding_system_put);
9164   defsubr (&Scoding_system_base);
9165   defsubr (&Scoding_system_plist);
9166   defsubr (&Scoding_system_aliases);
9167   defsubr (&Scoding_system_eol_type);
9168   defsubr (&Scoding_system_priority_list);
9169
9170   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
9171                doc: /* List of coding systems.
9172
9173 Do not alter the value of this variable manually.  This variable should be
9174 updated by the functions `define-coding-system' and
9175 `define-coding-system-alias'.  */);
9176   Vcoding_system_list = Qnil;
9177
9178   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
9179                doc: /* Alist of coding system names.
9180 Each element is one element list of coding system name.
9181 This variable is given to `completing-read' as TABLE argument.
9182
9183 Do not alter the value of this variable manually.  This variable should be
9184 updated by the functions `make-coding-system' and
9185 `define-coding-system-alias'.  */);
9186   Vcoding_system_alist = Qnil;
9187
9188   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
9189                doc: /* List of coding-categories (symbols) ordered by priority.
9190
9191 On detecting a coding system, Emacs tries code detection algorithms
9192 associated with each coding-category one by one in this order.  When
9193 one algorithm agrees with a byte sequence of source text, the coding
9194 system bound to the corresponding coding-category is selected.  */);
9195   {
9196     int i;
9197
9198     Vcoding_category_list = Qnil;
9199     for (i = coding_category_max - 1; i >= 0; i--)
9200       Vcoding_category_list
9201         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
9202                  Vcoding_category_list);
9203   }
9204
9205   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
9206                doc: /* Specify the coding system for read operations.
9207 It is useful to bind this variable with `let', but do not set it globally.
9208 If the value is a coding system, it is used for decoding on read operation.
9209 If not, an appropriate element is used from one of the coding system alists:
9210 There are three such tables, `file-coding-system-alist',
9211 `process-coding-system-alist', and `network-coding-system-alist'.  */);
9212   Vcoding_system_for_read = Qnil;
9213
9214   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
9215                doc: /* Specify the coding system for write operations.
9216 Programs bind this variable with `let', but you should not set it globally.
9217 If the value is a coding system, it is used for encoding of output,
9218 when writing it to a file and when sending it to a file or subprocess.
9219
9220 If this does not specify a coding system, an appropriate element
9221 is used from one of the coding system alists:
9222 There are three such tables, `file-coding-system-alist',
9223 `process-coding-system-alist', and `network-coding-system-alist'.
9224 For output to files, if the above procedure does not specify a coding system,
9225 the value of `buffer-file-coding-system' is used.  */);
9226   Vcoding_system_for_write = Qnil;
9227
9228   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
9229                doc: /*
9230 Coding system used in the latest file or process I/O.  */);
9231   Vlast_coding_system_used = Qnil;
9232
9233   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
9234                doc: /*
9235 Error status of the last code conversion.
9236
9237 When an error was detected in the last code conversion, this variable
9238 is set to one of the following symbols.
9239   `insufficient-source'
9240   `inconsistent-eol'
9241   `invalid-source'
9242   `interrupted'
9243   `insufficient-memory'
9244 When no error was detected, the value doesn't change.  So, to check
9245 the error status of a code conversion by this variable, you must
9246 explicitly set this variable to nil before performing code
9247 conversion.  */);
9248   Vlast_code_conversion_error = Qnil;
9249
9250   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
9251                doc: /*
9252 *Non-nil means always inhibit code conversion of end-of-line format.
9253 See info node `Coding Systems' and info node `Text and Binary' concerning
9254 such conversion.  */);
9255   inhibit_eol_conversion = 0;
9256
9257   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
9258                doc: /*
9259 Non-nil means process buffer inherits coding system of process output.
9260 Bind it to t if the process output is to be treated as if it were a file
9261 read from some filesystem.  */);
9262   inherit_process_coding_system = 0;
9263
9264   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
9265                doc: /*
9266 Alist to decide a coding system to use for a file I/O operation.
9267 The format is ((PATTERN . VAL) ...),
9268 where PATTERN is a regular expression matching a file name,
9269 VAL is a coding system, a cons of coding systems, or a function symbol.
9270 If VAL is a coding system, it is used for both decoding and encoding
9271 the file contents.
9272 If VAL is a cons of coding systems, the car part is used for decoding,
9273 and the cdr part is used for encoding.
9274 If VAL is a function symbol, the function must return a coding system
9275 or a cons of coding systems which are used as above.  The function gets
9276 the arguments with which `find-operation-coding-systems' was called.
9277
9278 See also the function `find-operation-coding-system'
9279 and the variable `auto-coding-alist'.  */);
9280   Vfile_coding_system_alist = Qnil;
9281
9282   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
9283                doc: /*
9284 Alist to decide a coding system to use for a process I/O operation.
9285 The format is ((PATTERN . VAL) ...),
9286 where PATTERN is a regular expression matching a program name,
9287 VAL is a coding system, a cons of coding systems, or a function symbol.
9288 If VAL is a coding system, it is used for both decoding what received
9289 from the program and encoding what sent to the program.
9290 If VAL is a cons of coding systems, the car part is used for decoding,
9291 and the cdr part is used for encoding.
9292 If VAL is a function symbol, the function must return a coding system
9293 or a cons of coding systems which are used as above.
9294
9295 See also the function `find-operation-coding-system'.  */);
9296   Vprocess_coding_system_alist = Qnil;
9297
9298   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
9299                doc: /*
9300 Alist to decide a coding system to use for a network I/O operation.
9301 The format is ((PATTERN . VAL) ...),
9302 where PATTERN is a regular expression matching a network service name
9303 or is a port number to connect to,
9304 VAL is a coding system, a cons of coding systems, or a function symbol.
9305 If VAL is a coding system, it is used for both decoding what received
9306 from the network stream and encoding what sent to the network stream.
9307 If VAL is a cons of coding systems, the car part is used for decoding,
9308 and the cdr part is used for encoding.
9309 If VAL is a function symbol, the function must return a coding system
9310 or a cons of coding systems which are used as above.
9311
9312 See also the function `find-operation-coding-system'.  */);
9313   Vnetwork_coding_system_alist = Qnil;
9314
9315   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
9316                doc: /* Coding system to use with system messages.
9317 Also used for decoding keyboard input on X Window system.  */);
9318   Vlocale_coding_system = Qnil;
9319
9320   /* The eol mnemonics are reset in startup.el system-dependently.  */
9321   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
9322                doc: /*
9323 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
9324   eol_mnemonic_unix = build_string (":");
9325
9326   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
9327                doc: /*
9328 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
9329   eol_mnemonic_dos = build_string ("\\");
9330
9331   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
9332                doc: /*
9333 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
9334   eol_mnemonic_mac = build_string ("/");
9335
9336   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
9337                doc: /*
9338 *String displayed in mode line when end-of-line format is not yet determined.  */);
9339   eol_mnemonic_undecided = build_string (":");
9340
9341   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
9342                doc: /*
9343 *Non-nil enables character translation while encoding and decoding.  */);
9344   Venable_character_translation = Qt;
9345
9346   DEFVAR_LISP ("standard-translation-table-for-decode",
9347                &Vstandard_translation_table_for_decode,
9348                doc: /* Table for translating characters while decoding.  */);
9349   Vstandard_translation_table_for_decode = Qnil;
9350
9351   DEFVAR_LISP ("standard-translation-table-for-encode",
9352                &Vstandard_translation_table_for_encode,
9353                doc: /* Table for translating characters while encoding.  */);
9354   Vstandard_translation_table_for_encode = Qnil;
9355
9356   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
9357                doc: /* Alist of charsets vs revision numbers.
9358 While encoding, if a charset (car part of an element) is found,
9359 designate it with the escape sequence identifying revision (cdr part
9360 of the element).  */);
9361   Vcharset_revision_table = Qnil;
9362
9363   DEFVAR_LISP ("default-process-coding-system",
9364                &Vdefault_process_coding_system,
9365                doc: /* Cons of coding systems used for process I/O by default.
9366 The car part is used for decoding a process output,
9367 the cdr part is used for encoding a text to be sent to a process.  */);
9368   Vdefault_process_coding_system = Qnil;
9369
9370   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
9371                doc: /*
9372 Table of extra Latin codes in the range 128..159 (inclusive).
9373 This is a vector of length 256.
9374 If Nth element is non-nil, the existence of code N in a file
9375 \(or output of subprocess) doesn't prevent it to be detected as
9376 a coding system of ISO 2022 variant which has a flag
9377 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9378 or reading output of a subprocess.
9379 Only 128th through 159th elements has a meaning.  */);
9380   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
9381
9382   DEFVAR_LISP ("select-safe-coding-system-function",
9383                &Vselect_safe_coding_system_function,
9384                doc: /*
9385 Function to call to select safe coding system for encoding a text.
9386
9387 If set, this function is called to force a user to select a proper
9388 coding system which can encode the text in the case that a default
9389 coding system used in each operation can't encode the text.
9390
9391 The default value is `select-safe-coding-system' (which see).  */);
9392   Vselect_safe_coding_system_function = Qnil;
9393
9394   DEFVAR_BOOL ("coding-system-require-warning",
9395                &coding_system_require_warning,
9396                doc: /* Internal use only.
9397 If non-nil, on writing a file, `select-safe-coding-system-function' is
9398 called even if `coding-system-for-write' is non-nil.  The command
9399 `universal-coding-system-argument' binds this variable to t temporarily.  */);
9400   coding_system_require_warning = 0;
9401
9402
9403   DEFVAR_BOOL ("inhibit-iso-escape-detection",
9404                &inhibit_iso_escape_detection,
9405                doc: /*
9406 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9407
9408 By default, on reading a file, Emacs tries to detect how the text is
9409 encoded.  This code detection is sensitive to escape sequences.  If
9410 the sequence is valid as ISO2022, the code is determined as one of
9411 the ISO2022 encodings, and the file is decoded by the corresponding
9412 coding system (e.g. `iso-2022-7bit').
9413
9414 However, there may be a case that you want to read escape sequences in
9415 a file as is.  In such a case, you can set this variable to non-nil.
9416 Then, as the code detection ignores any escape sequences, no file is
9417 detected as encoded in some ISO2022 encoding.  The result is that all
9418 escape sequences become visible in a buffer.
9419
9420 The default value is nil, and it is strongly recommended not to change
9421 it.  That is because many Emacs Lisp source files that contain
9422 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9423 in Emacs's distribution, and they won't be decoded correctly on
9424 reading if you suppress escape sequence detection.
9425
9426 The other way to read escape sequences in a file without decoding is
9427 to explicitly specify some coding system that doesn't use ISO2022's
9428 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
9429   inhibit_iso_escape_detection = 0;
9430
9431   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
9432                doc: /* Char table for translating self-inserting characters.
9433 This is applied to the result of input methods, not their input.  See also
9434 `keyboard-translate-table'.  */);
9435     Vtranslation_table_for_input = Qnil;
9436
9437   {
9438     Lisp_Object args[coding_arg_max];
9439     Lisp_Object plist[16];
9440     int i;
9441
9442     for (i = 0; i < coding_arg_max; i++)
9443       args[i] = Qnil;
9444
9445     plist[0] = intern (":name");
9446     plist[1] = args[coding_arg_name] = Qno_conversion;
9447     plist[2] = intern (":mnemonic");
9448     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
9449     plist[4] = intern (":coding-type");
9450     plist[5] = args[coding_arg_coding_type] = Qraw_text;
9451     plist[6] = intern (":ascii-compatible-p");
9452     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
9453     plist[8] = intern (":default-char");
9454     plist[9] = args[coding_arg_default_char] = make_number (0);
9455     plist[10] = intern (":for-unibyte");
9456     plist[11] = args[coding_arg_for_unibyte] = Qt;
9457     plist[12] = intern (":docstring");
9458     plist[13] = build_string ("Do no conversion.\n\
9459 \n\
9460 When you visit a file with this coding, the file is read into a\n\
9461 unibyte buffer as is, thus each byte of a file is treated as a\n\
9462 character.");
9463     plist[14] = intern (":eol-type");
9464     plist[15] = args[coding_arg_eol_type] = Qunix;
9465     args[coding_arg_plist] = Flist (16, plist);
9466     Fdefine_coding_system_internal (coding_arg_max, args);
9467   }
9468
9469   setup_coding_system (Qno_conversion, &keyboard_coding);
9470   setup_coding_system (Qno_conversion, &terminal_coding);
9471   setup_coding_system (Qno_conversion, &safe_terminal_coding);
9472
9473   {
9474     int i;
9475
9476     for (i = 0; i < coding_category_max; i++)
9477       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
9478   }
9479 }
9480
9481 char *
9482 emacs_strerror (error_number)
9483      int error_number;
9484 {
9485   char *str;
9486
9487   synchronize_system_messages_locale ();
9488   str = strerror (error_number);
9489
9490   if (! NILP (Vlocale_coding_system))
9491     {
9492       Lisp_Object dec = code_convert_string_norecord (build_string (str),
9493                                                       Vlocale_coding_system,
9494                                                       0);
9495       str = (char *) SDATA (dec);
9496     }
9497
9498   return str;
9499 }
9500
9501 #endif /* emacs */