src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3      Licensed to the Free Software Foundation.
   4    Copyright (C) 2001, 2002 Free Software Foundation, Inc.
   5    Copyright (C) 2003
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H13PRO009
   8
   9 This file is part of GNU Emacs.
  10
  11 GNU Emacs is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU General Public License as published by
  13 the Free Software Foundation; either version 2, or (at your option)
  14 any later version.
  15
  16 GNU Emacs is distributed in the hope that it will be useful,
  17 but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 GNU General Public License for more details.
  20
  21 You should have received a copy of the GNU General Public License
  22 along with GNU Emacs; see the file COPYING.  If not, write to
  23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  24 Boston, MA 02111-1307, USA.  */
  25
  26 /*** TABLE OF CONTENTS ***
  27
  28   0. General comments
  29   1. Preamble
  30   2. Emacs' internal format (emacs-utf-8) handlers
  31   3. UTF-8 handlers
  32   4. UTF-16 handlers
  33   5. Charset-base coding systems handlers
  34   6. emacs-mule (old Emacs' internal format) handlers
  35   7. ISO2022 handlers
  36   8. Shift-JIS and BIG5 handlers
  37   9. CCL handlers
  38   10. C library functions
  39   11. Emacs Lisp library functions
  40   12. Postamble
  41
  42 */
  43
  44 /*** 0. General comments ***
  45
  46
  47 CODING SYSTEM
  48
  49   A coding system is an object for an encoding mechanism that contains
  50   information about how to convert byte sequences to character
  51   sequences and vice versa.  When we say "decode", it means converting
  52   a byte sequence of a specific coding system into a character
  53   sequence that is represented by Emacs' internal coding system
  54   `emacs-utf-8', and when we say "encode", it means converting a
  55   character sequence of emacs-utf-8 to a byte sequence of a specific
  56   coding system.
  57
  58   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  59   C level, a coding system is represented by a vector of attributes
  60   stored in the hash table Vcharset_hash_table.  The conversion from
  61   coding system symbol to attributes vector is done by looking up
  62   Vcharset_hash_table by the symbol.
  63
  64   Coding systems are classified into the following types depending on
  65   the encoding mechanism.  Here's a brief description of the types.
  66
  67   o UTF-8
  68
  69   o UTF-16
  70
  71   o Charset-base coding system
  72
  73   A coding system defined by one or more (coded) character sets.
  74   Decoding and encoding are done by a code converter defined for each
  75   character set.
  76
  77   o Old Emacs internal format (emacs-mule)
  78
  79   The coding system adopted by old versions of Emacs (20 and 21).
  80
  81   o ISO2022-base coding system
  82
  83   The most famous coding system for multiple character sets.  X's
  84   Compound Text, various EUCs (Extended Unix Code), and coding systems
  85   used in the Internet communication such as ISO-2022-JP are all
  86   variants of ISO2022.
  87
  88   o SJIS (or Shift-JIS or MS-Kanji-Code)
  89
  90   A coding system to encode character sets: ASCII, JISX0201, and
  91   JISX0208.  Widely used for PC's in Japan.  Details are described in
  92   section 8.
  93
  94   o BIG5
  95
  96   A coding system to encode character sets: ASCII and Big5.  Widely
  97   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  98   described in section 8.  In this file, when we write "big5" (all
  99   lowercase), we mean the coding system, and when we write "Big5"
 100   (capitalized), we mean the character set.
 101
 102   o CCL
 103
 104   If a user wants to decode/encode text encoded in a coding system
 105   not listed above, he can supply a decoder and an encoder for it in
 106   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 107   program while decoding/encoding.
 108
 109   o Raw-text
 110
 111   A coding system for text containing raw eight-bit data.  Emacs
 112   treats each byte of source text as a character (except for
 113   end-of-line conversion).
 114
 115   o No-conversion
 116
 117   Like raw text, but don't do end-of-line conversion.
 118
 119
 120 END-OF-LINE FORMAT
 121
 122   How text end-of-line is encoded depends on operating system.  For
 123   instance, Unix's format is just one byte of LF (line-feed) code,
 124   whereas DOS's format is two-byte sequence of `carriage-return' and
 125   `line-feed' codes.  MacOS's format is usually one byte of
 126   `carriage-return'.
 127
 128   Since text character encoding and end-of-line encoding are
 129   independent, any coding system described above can take any format
 130   of end-of-line (except for no-conversion).
 131
 132 STRUCT CODING_SYSTEM
 133
 134   Before using a coding system for code conversion (i.e. decoding and
 135   encoding), we setup a structure of type `struct coding_system'.
 136   This structure keeps various information about a specific code
 137   conversion (e.g. the location of source and destination data).
 138
 139 */
 140
 141 /* COMMON MACROS */
 142
 143
 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 145
 146   These functions check if a byte sequence specified as a source in
 147   CODING conforms to the format of XXX, and update the members of
 148   DETECT_INFO.
 149
 150   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 151
 152   Below is the template of these functions.  */
 153
 154 #if 0
 155 static int
 156 detect_coding_XXX (coding, detect_info)
 157      struct coding_system *coding;
 158      struct coding_detection_info *detect_info;
 159 {
 160   unsigned char *src = coding->source;
 161   unsigned char *src_end = coding->source + coding->src_bytes;
 162   int multibytep = coding->src_multibyte;
 163   int consumed_chars = 0;
 164   int found = 0;
 165   ...;
 166
 167   while (1)
 168     {
 169       /* Get one byte from the source.  If the souce is exausted, jump
 170          to no_more_source:.  */
 171       ONE_MORE_BYTE (c);
 172
 173       if (! __C_conforms_to_XXX___ (c))
 174         break;
 175       if (! __C_strongly_suggests_XXX__ (c))
 176         found = CATEGORY_MASK_XXX;
 177     }
 178   /* The byte sequence is invalid for XXX.  */
 179   detect_info->rejected |= CATEGORY_MASK_XXX;
 180   return 0;
 181
 182  no_more_source:
 183   /* The source exausted successfully.  */
 184   detect_info->found |= found;
 185   return 1;
 186 }
 187 #endif
 188
 189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 190
 191   These functions decode a byte sequence specified as a source by
 192   CODING.  The resulting multibyte text goes to a place pointed to by
 193   CODING->charbuf, the length of which should not exceed
 194   CODING->charbuf_size;
 195
 196   These functions set the information of original and decoded texts in
 197   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 198   They also set CODING->result to one of CODING_RESULT_XXX indicating
 199   how the decoding is finished.
 200
 201   Below is the template of these functions.  */
 202
 203 #if 0
 204 static void
 205 decode_coding_XXXX (coding)
 206      struct coding_system *coding;
 207 {
 208   unsigned char *src = coding->source + coding->consumed;
 209   unsigned char *src_end = coding->source + coding->src_bytes;
 210   /* SRC_BASE remembers the start position in source in each loop.
 211      The loop will be exited when there's not enough source code, or
 212      when there's no room in CHARBUF for a decoded character.  */
 213   unsigned char *src_base;
 214   /* A buffer to produce decoded characters.  */
 215   int *charbuf = coding->charbuf + coding->charbuf_used;
 216   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 217   int multibytep = coding->src_multibyte;
 218
 219   while (1)
 220     {
 221       src_base = src;
 222       if (charbuf < charbuf_end)
 223         /* No more room to produce a decoded character.  */
 224         break;
 225       ONE_MORE_BYTE (c);
 226       /* Decode it. */
 227     }
 228
 229  no_more_source:
 230   if (src_base < src_end
 231       && coding->mode & CODING_MODE_LAST_BLOCK)
 232     /* If the source ends by partial bytes to construct a character,
 233        treat them as eight-bit raw data.  */
 234     while (src_base < src_end && charbuf < charbuf_end)
 235       *charbuf++ = *src_base++;
 236   /* Remember how many bytes and characters we consumed.  If the
 237      source is multibyte, the bytes and chars are not identical.  */
 238   coding->consumed = coding->consumed_char = src_base - coding->source;
 239   /* Remember how many characters we produced.  */
 240   coding->charbuf_used = charbuf - coding->charbuf;
 241 }
 242 #endif
 243
 244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 245
 246   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 247   internal multibyte format by CODING.  The resulting byte sequence
 248   goes to a place pointed to by DESTINATION, the length of which
 249   should not exceed DST_BYTES.
 250
 251   These functions set the information of original and encoded texts in
 252   the members produced, produced_char, consumed, and consumed_char of
 253   the structure *CODING.  They also set the member result to one of
 254   CODING_RESULT_XXX indicating how the encoding finished.
 255
 256   DST_BYTES zero means that source area and destination area are
 257   overlapped, which means that we can produce a encoded text until it
 258   reaches at the head of not-yet-encoded source text.
 259
 260   Below is a template of these functions.  */
 261 #if 0
 262 static void
 263 encode_coding_XXX (coding)
 264      struct coding_system *coding;
 265 {
 266   int multibytep = coding->dst_multibyte;
 267   int *charbuf = coding->charbuf;
 268   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 269   unsigned char *dst = coding->destination + coding->produced;
 270   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 271   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 272   int produced_chars = 0;
 273
 274   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 275     {
 276       int c = *charbuf;
 277       /* Encode C into DST, and increment DST.  */
 278     }
 279  label_no_more_destination:
 280   /* How many chars and bytes we produced.  */
 281   coding->produced_char += produced_chars;
 282   coding->produced = dst - coding->destination;
 283 }
 284 #endif
 285
 286 \f
 287 /*** 1. Preamble ***/
 288
 289 #include <config.h>
 290 #include <stdio.h>
 291
 292 #include "lisp.h"
 293 #include "buffer.h"
 294 #include "character.h"
 295 #include "charset.h"
 296 #include "ccl.h"
 297 #include "composite.h"
 298 #include "coding.h"
 299 #include "window.h"
 300
 301 Lisp_Object Vcoding_system_hash_table;
 302
 303 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 304 Lisp_Object Qunix, Qdos;
 305 extern Lisp_Object Qmac;        /* frame.c */
 306 Lisp_Object Qbuffer_file_coding_system;
 307 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 308 Lisp_Object Qdefault_char;
 309 Lisp_Object Qno_conversion, Qundecided;
 310 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 311 Lisp_Object Qbig, Qlittle;
 312 Lisp_Object Qcoding_system_history;
 313 Lisp_Object Qvalid_codes;
 314 Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
 315 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 316 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 317
 318 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 319 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 320 Lisp_Object Qstart_process, Qopen_network_stream;
 321 Lisp_Object Qtarget_idx;
 322
 323 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 324 Lisp_Object Qinterrupted, Qinsufficient_memory;
 325
 326 int coding_system_require_warning;
 327
 328 Lisp_Object Vselect_safe_coding_system_function;
 329
 330 /* Mnemonic string for each format of end-of-line.  */
 331 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 332 /* Mnemonic string to indicate format of end-of-line is not yet
 333    decided.  */
 334 Lisp_Object eol_mnemonic_undecided;
 335
 336 #ifdef emacs
 337
 338 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 339
 340 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 341
 342 /* Coding system emacs-mule and raw-text are for converting only
 343    end-of-line format.  */
 344 Lisp_Object Qemacs_mule, Qraw_text;
 345 Lisp_Object Qutf_8_emacs;
 346
 347 /* Coding-systems are handed between Emacs Lisp programs and C internal
 348    routines by the following three variables.  */
 349 /* Coding-system for reading files and receiving data from process.  */
 350 Lisp_Object Vcoding_system_for_read;
 351 /* Coding-system for writing files and sending data to process.  */
 352 Lisp_Object Vcoding_system_for_write;
 353 /* Coding-system actually used in the latest I/O.  */
 354 Lisp_Object Vlast_coding_system_used;
 355 /* Set to non-nil when an error is detected while code conversion.  */
 356 Lisp_Object Vlast_code_conversion_error;
 357 /* A vector of length 256 which contains information about special
 358    Latin codes (especially for dealing with Microsoft codes).  */
 359 Lisp_Object Vlatin_extra_code_table;
 360
 361 /* Flag to inhibit code conversion of end-of-line format.  */
 362 int inhibit_eol_conversion;
 363
 364 /* Flag to inhibit ISO2022 escape sequence detection.  */
 365 int inhibit_iso_escape_detection;
 366
 367 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 368 int inherit_process_coding_system;
 369
 370 /* Coding system to be used to encode text for terminal display.  */
 371 struct coding_system terminal_coding;
 372
 373 /* Coding system to be used to encode text for terminal display when
 374    terminal coding system is nil.  */
 375 struct coding_system safe_terminal_coding;
 376
 377 /* Coding system of what is sent from terminal keyboard.  */
 378 struct coding_system keyboard_coding;
 379
 380 Lisp_Object Vfile_coding_system_alist;
 381 Lisp_Object Vprocess_coding_system_alist;
 382 Lisp_Object Vnetwork_coding_system_alist;
 383
 384 Lisp_Object Vlocale_coding_system;
 385
 386 #endif /* emacs */
 387
 388 /* Flag to tell if we look up translation table on character code
 389    conversion.  */
 390 Lisp_Object Venable_character_translation;
 391 /* Standard translation table to look up on decoding (reading).  */
 392 Lisp_Object Vstandard_translation_table_for_decode;
 393 /* Standard translation table to look up on encoding (writing).  */
 394 Lisp_Object Vstandard_translation_table_for_encode;
 395
 396 Lisp_Object Qtranslation_table;
 397 Lisp_Object Qtranslation_table_id;
 398 Lisp_Object Qtranslation_table_for_decode;
 399 Lisp_Object Qtranslation_table_for_encode;
 400
 401 /* Alist of charsets vs revision number.  */
 402 static Lisp_Object Vcharset_revision_table;
 403
 404 /* Default coding systems used for process I/O.  */
 405 Lisp_Object Vdefault_process_coding_system;
 406
 407 /* Char table for translating Quail and self-inserting input.  */
 408 Lisp_Object Vtranslation_table_for_input;
 409
 410 /* Two special coding systems.  */
 411 Lisp_Object Vsjis_coding_system;
 412 Lisp_Object Vbig5_coding_system;
 413
 414 static void record_conversion_result (struct coding_system *coding,
 415                                       enum coding_result_code result);
 416 static int detect_coding_utf_8 P_ ((struct coding_system *,
 417                                     struct coding_detection_info *info));
 418 static void decode_coding_utf_8 P_ ((struct coding_system *));
 419 static int encode_coding_utf_8 P_ ((struct coding_system *));
 420
 421 static int detect_coding_utf_16 P_ ((struct coding_system *,
 422                                      struct coding_detection_info *info));
 423 static void decode_coding_utf_16 P_ ((struct coding_system *));
 424 static int encode_coding_utf_16 P_ ((struct coding_system *));
 425
 426 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 427                                        struct coding_detection_info *info));
 428 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 429 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 430
 431 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 432                                          struct coding_detection_info *info));
 433 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 434 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 435
 436 static int detect_coding_sjis P_ ((struct coding_system *,
 437                                    struct coding_detection_info *info));
 438 static void decode_coding_sjis P_ ((struct coding_system *));
 439 static int encode_coding_sjis P_ ((struct coding_system *));
 440
 441 static int detect_coding_big5 P_ ((struct coding_system *,
 442                                    struct coding_detection_info *info));
 443 static void decode_coding_big5 P_ ((struct coding_system *));
 444 static int encode_coding_big5 P_ ((struct coding_system *));
 445
 446 static int detect_coding_ccl P_ ((struct coding_system *,
 447                                   struct coding_detection_info *info));
 448 static void decode_coding_ccl P_ ((struct coding_system *));
 449 static int encode_coding_ccl P_ ((struct coding_system *));
 450
 451 static void decode_coding_raw_text P_ ((struct coding_system *));
 452 static int encode_coding_raw_text P_ ((struct coding_system *));
 453
 454
 455 /* ISO2022 section */
 456
 457 #define CODING_ISO_INITIAL(coding, reg)                 \
 458   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 459                      coding_attr_iso_initial),          \
 460                reg)))
 461
 462
 463 #define CODING_ISO_REQUEST(coding, charset_id)  \
 464   ((charset_id <= (coding)->max_charset_id      \
 465     ? (coding)->safe_charsets[charset_id]       \
 466     : -1))
 467
 468
 469 #define CODING_ISO_FLAGS(coding)        \
 470   ((coding)->spec.iso_2022.flags)
 471 #define CODING_ISO_DESIGNATION(coding, reg)     \
 472   ((coding)->spec.iso_2022.current_designation[reg])
 473 #define CODING_ISO_INVOCATION(coding, plane)    \
 474   ((coding)->spec.iso_2022.current_invocation[plane])
 475 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 476   ((coding)->spec.iso_2022.single_shifting)
 477 #define CODING_ISO_BOL(coding)  \
 478   ((coding)->spec.iso_2022.bol)
 479 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 480   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 481
 482 /* Control characters of ISO2022.  */
 483                         /* code */      /* function */
 484 #define ISO_CODE_LF     0x0A            /* line-feed */
 485 #define ISO_CODE_CR     0x0D            /* carriage-return */
 486 #define ISO_CODE_SO     0x0E            /* shift-out */
 487 #define ISO_CODE_SI     0x0F            /* shift-in */
 488 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 489 #define ISO_CODE_ESC    0x1B            /* escape */
 490 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 491 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 492 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 493
 494 /* All code (1-byte) of ISO2022 is classified into one of the
 495    followings.  */
 496 enum iso_code_class_type
 497   {
 498     ISO_control_0,              /* Control codes in the range
 499                                    0x00..0x1F and 0x7F, except for the
 500                                    following 5 codes.  */
 501     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 502     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 503     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 504     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 505     ISO_control_1,              /* Control codes in the range
 506                                    0x80..0x9F, except for the
 507                                    following 3 codes.  */
 508     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 509     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 510     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 511     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 512     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 513     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 514     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 515   };
 516
 517 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 518     `iso-flags' attribute of an iso2022 coding system.  */
 519
 520 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 521    instead of the correct short-form sequence (e.g. ESC $ A).  */
 522 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 523
 524 /* If set, reset graphic planes and registers at end-of-line to the
 525    initial state.  */
 526 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 527
 528 /* If set, reset graphic planes and registers before any control
 529    characters to the initial state.  */
 530 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 531
 532 /* If set, encode by 7-bit environment.  */
 533 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 534
 535 /* If set, use locking-shift function.  */
 536 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 537
 538 /* If set, use single-shift function.  Overwrite
 539    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 540 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 541
 542 /* If set, use designation escape sequence.  */
 543 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 544
 545 /* If set, produce revision number sequence.  */
 546 #define CODING_ISO_FLAG_REVISION        0x0080
 547
 548 /* If set, produce ISO6429's direction specifying sequence.  */
 549 #define CODING_ISO_FLAG_DIRECTION       0x0100
 550
 551 /* If set, assume designation states are reset at beginning of line on
 552    output.  */
 553 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 554
 555 /* If set, designation sequence should be placed at beginning of line
 556    on output.  */
 557 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 558
 559 /* If set, do not encode unsafe charactes on output.  */
 560 #define CODING_ISO_FLAG_SAFE            0x0800
 561
 562 /* If set, extra latin codes (128..159) are accepted as a valid code
 563    on input.  */
 564 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 565
 566 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 567
 568 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 569
 570 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 571
 572 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 573
 574 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 575
 576 /* A character to be produced on output if encoding of the original
 577    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 578 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 579
 580
 581 /* UTF-16 section */
 582 #define CODING_UTF_16_BOM(coding)       \
 583   ((coding)->spec.utf_16.bom)
 584
 585 #define CODING_UTF_16_ENDIAN(coding)    \
 586   ((coding)->spec.utf_16.endian)
 587
 588 #define CODING_UTF_16_SURROGATE(coding) \
 589   ((coding)->spec.utf_16.surrogate)
 590
 591
 592 /* CCL section */
 593 #define CODING_CCL_DECODER(coding)      \
 594   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 595 #define CODING_CCL_ENCODER(coding)      \
 596   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 597 #define CODING_CCL_VALIDS(coding)                                          \
 598   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 599
 600 /* Index for each coding category in `coding_categories' */
 601
 602 enum coding_category
 603   {
 604     coding_category_iso_7,
 605     coding_category_iso_7_tight,
 606     coding_category_iso_8_1,
 607     coding_category_iso_8_2,
 608     coding_category_iso_7_else,
 609     coding_category_iso_8_else,
 610     coding_category_utf_8,
 611     coding_category_utf_16_auto,
 612     coding_category_utf_16_be,
 613     coding_category_utf_16_le,
 614     coding_category_utf_16_be_nosig,
 615     coding_category_utf_16_le_nosig,
 616     coding_category_charset,
 617     coding_category_sjis,
 618     coding_category_big5,
 619     coding_category_ccl,
 620     coding_category_emacs_mule,
 621     /* All above are targets of code detection.  */
 622     coding_category_raw_text,
 623     coding_category_undecided,
 624     coding_category_max
 625   };
 626
 627 /* Definitions of flag bits used in detect_coding_XXXX.  */
 628 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 629 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 630 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 631 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 632 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 633 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 634 #define CATEGORY_MASK_UTF_8             (1 << coding_category_utf_8)
 635 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 636 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 637 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 638 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 639 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 640 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 641 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 642 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 643 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 644 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 645 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 646
 647 /* This value is returned if detect_coding_mask () find nothing other
 648    than ASCII characters.  */
 649 #define CATEGORY_MASK_ANY               \
 650   (CATEGORY_MASK_ISO_7                  \
 651    | CATEGORY_MASK_ISO_7_TIGHT          \
 652    | CATEGORY_MASK_ISO_8_1              \
 653    | CATEGORY_MASK_ISO_8_2              \
 654    | CATEGORY_MASK_ISO_7_ELSE           \
 655    | CATEGORY_MASK_ISO_8_ELSE           \
 656    | CATEGORY_MASK_UTF_8                \
 657    | CATEGORY_MASK_UTF_16_BE            \
 658    | CATEGORY_MASK_UTF_16_LE            \
 659    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 660    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 661    | CATEGORY_MASK_CHARSET              \
 662    | CATEGORY_MASK_SJIS                 \
 663    | CATEGORY_MASK_BIG5                 \
 664    | CATEGORY_MASK_CCL                  \
 665    | CATEGORY_MASK_EMACS_MULE)
 666
 667
 668 #define CATEGORY_MASK_ISO_7BIT \
 669   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 670
 671 #define CATEGORY_MASK_ISO_8BIT \
 672   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 673
 674 #define CATEGORY_MASK_ISO_ELSE \
 675   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 676
 677 #define CATEGORY_MASK_ISO_ESCAPE        \
 678   (CATEGORY_MASK_ISO_7                  \
 679    | CATEGORY_MASK_ISO_7_TIGHT          \
 680    | CATEGORY_MASK_ISO_7_ELSE           \
 681    | CATEGORY_MASK_ISO_8_ELSE)
 682
 683 #define CATEGORY_MASK_ISO       \
 684   (  CATEGORY_MASK_ISO_7BIT     \
 685      | CATEGORY_MASK_ISO_8BIT   \
 686      | CATEGORY_MASK_ISO_ELSE)
 687
 688 #define CATEGORY_MASK_UTF_16            \
 689   (CATEGORY_MASK_UTF_16_BE              \
 690    | CATEGORY_MASK_UTF_16_LE            \
 691    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 692    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 693
 694
 695 /* List of symbols `coding-category-xxx' ordered by priority.  This
 696    variable is exposed to Emacs Lisp.  */
 697 static Lisp_Object Vcoding_category_list;
 698
 699 /* Table of coding categories (Lisp symbols).  This variable is for
 700    internal use oly.  */
 701 static Lisp_Object Vcoding_category_table;
 702
 703 /* Table of coding-categories ordered by priority.  */
 704 static enum coding_category coding_priorities[coding_category_max];
 705
 706 /* Nth element is a coding context for the coding system bound to the
 707    Nth coding category.  */
 708 static struct coding_system coding_categories[coding_category_max];
 709
 710 /*** Commonly used macros and functions ***/
 711
 712 #ifndef min
 713 #define min(a, b) ((a) < (b) ? (a) : (b))
 714 #endif
 715 #ifndef max
 716 #define max(a, b) ((a) > (b) ? (a) : (b))
 717 #endif
 718
 719 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 720   do {                                                  \
 721     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 722     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 723   } while (0)
 724
 725
 726 /* Safely get one byte from the source text pointed by SRC which ends
 727    at SRC_END, and set C to that byte.  If there are not enough bytes
 728    in the source, it jumps to `no_more_source'.  If multibytep is
 729    nonzero, and a multibyte character is found at SRC, set C to the
 730    negative value of the character code.  The caller should declare
 731    and set these variables appropriately in advance:
 732         src, src_end, multibytep */
 733
 734 #define ONE_MORE_BYTE(c)                                \
 735   do {                                                  \
 736     if (src == src_end)                                 \
 737       {                                                 \
 738         if (src_base < src)                             \
 739           record_conversion_result                      \
 740             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 741         goto no_more_source;                            \
 742       }                                                 \
 743     c = *src++;                                         \
 744     if (multibytep && (c & 0x80))                       \
 745       {                                                 \
 746         if ((c & 0xFE) == 0xC0)                         \
 747           c = ((c & 1) << 6) | *src++;                  \
 748         else                                            \
 749           {                                             \
 750             c = - string_char (--src, &src, NULL);      \
 751             record_conversion_result                    \
 752               (coding, CODING_RESULT_INVALID_SRC);      \
 753           }                                             \
 754       }                                                 \
 755     consumed_chars++;                                   \
 756   } while (0)
 757
 758
 759 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 760   do {                                                  \
 761     c = *src++;                                         \
 762     if (multibytep && (c & 0x80))                       \
 763       {                                                 \
 764         if ((c & 0xFE) == 0xC0)                         \
 765           c = ((c & 1) << 6) | *src++;                  \
 766         else                                            \
 767           {                                             \
 768             c = - string_char (--src, &src, NULL);      \
 769             record_conversion_result                    \
 770               (coding, CODING_RESULT_INVALID_SRC);      \
 771           }                                             \
 772       }                                                 \
 773     consumed_chars++;                                   \
 774   } while (0)
 775
 776
 777 /* Store a byte C in the place pointed by DST and increment DST to the
 778    next free point, and increment PRODUCED_CHARS.  The caller should
 779    assure that C is 0..127, and declare and set the variable `dst'
 780    appropriately in advance.
 781 */
 782
 783
 784 #define EMIT_ONE_ASCII_BYTE(c)  \
 785   do {                          \
 786     produced_chars++;           \
 787     *dst++ = (c);               \
 788   } while (0)
 789
 790
 791 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 792
 793 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 794   do {                                  \
 795     produced_chars += 2;                \
 796     *dst++ = (c1), *dst++ = (c2);       \
 797   } while (0)
 798
 799
 800 /* Store a byte C in the place pointed by DST and increment DST to the
 801    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 802    nonzero, store in an appropriate multibyte from.  The caller should
 803    declare and set the variables `dst' and `multibytep' appropriately
 804    in advance.  */
 805
 806 #define EMIT_ONE_BYTE(c)                \
 807   do {                                  \
 808     produced_chars++;                   \
 809     if (multibytep)                     \
 810       {                                 \
 811         int ch = (c);                   \
 812         if (ch >= 0x80)                 \
 813           ch = BYTE8_TO_CHAR (ch);      \
 814         CHAR_STRING_ADVANCE (ch, dst);  \
 815       }                                 \
 816     else                                \
 817       *dst++ = (c);                     \
 818   } while (0)
 819
 820
 821 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 822
 823 #define EMIT_TWO_BYTES(c1, c2)          \
 824   do {                                  \
 825     produced_chars += 2;                \
 826     if (multibytep)                     \
 827       {                                 \
 828         int ch;                         \
 829                                         \
 830         ch = (c1);                      \
 831         if (ch >= 0x80)                 \
 832           ch = BYTE8_TO_CHAR (ch);      \
 833         CHAR_STRING_ADVANCE (ch, dst);  \
 834         ch = (c2);                      \
 835         if (ch >= 0x80)                 \
 836           ch = BYTE8_TO_CHAR (ch);      \
 837         CHAR_STRING_ADVANCE (ch, dst);  \
 838       }                                 \
 839     else                                \
 840       {                                 \
 841         *dst++ = (c1);                  \
 842         *dst++ = (c2);                  \
 843       }                                 \
 844   } while (0)
 845
 846
 847 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 848   do {                                  \
 849     EMIT_ONE_BYTE (c1);                 \
 850     EMIT_TWO_BYTES (c2, c3);            \
 851   } while (0)
 852
 853
 854 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 855   do {                                          \
 856     EMIT_TWO_BYTES (c1, c2);                    \
 857     EMIT_TWO_BYTES (c3, c4);                    \
 858   } while (0)
 859
 860
 861 static void
 862 record_conversion_result (struct coding_system *coding,
 863                           enum coding_result_code result)
 864 {
 865   coding->result = result;
 866   switch (result)
 867     {
 868     case CODING_RESULT_INSUFFICIENT_SRC:
 869       Vlast_code_conversion_error = Qinsufficient_source;
 870       break;
 871     case CODING_RESULT_INCONSISTENT_EOL:
 872       Vlast_code_conversion_error = Qinconsistent_eol;
 873       break;
 874     case CODING_RESULT_INVALID_SRC:
 875       Vlast_code_conversion_error = Qinvalid_source;
 876       break;
 877     case CODING_RESULT_INTERRUPT:
 878       Vlast_code_conversion_error = Qinterrupted;
 879       break;
 880     case CODING_RESULT_INSUFFICIENT_MEM:
 881       Vlast_code_conversion_error = Qinsufficient_memory;
 882       break;
 883     }
 884 }
 885
 886 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 887   do {                                                                       \
 888     charset_map_loaded = 0;                                                  \
 889     c = DECODE_CHAR (charset, code);                                         \
 890     if (charset_map_loaded)                                                  \
 891       {                                                                      \
 892         const unsigned char *orig = coding->source;                          \
 893         EMACS_INT offset;                                                    \
 894                                                                              \
 895         coding_set_source (coding);                                          \
 896         offset = coding->source - orig;                                      \
 897         src += offset;                                                       \
 898         src_base += offset;                                                  \
 899         src_end += offset;                                                   \
 900       }                                                                      \
 901   } while (0)
 902
 903
 904 #define ASSURE_DESTINATION(bytes)                               \
 905   do {                                                          \
 906     if (dst + (bytes) >= dst_end)                               \
 907       {                                                         \
 908         int more_bytes = charbuf_end - charbuf + (bytes);       \
 909                                                                 \
 910         dst = alloc_destination (coding, more_bytes, dst);      \
 911         dst_end = coding->destination + coding->dst_bytes;      \
 912       }                                                         \
 913   } while (0)
 914
 915
 916
 917 static void
 918 coding_set_source (coding)
 919      struct coding_system *coding;
 920 {
 921   if (BUFFERP (coding->src_object))
 922     {
 923       struct buffer *buf = XBUFFER (coding->src_object);
 924
 925       if (coding->src_pos < 0)
 926         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
 927       else
 928         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
 929     }
 930   else if (STRINGP (coding->src_object))
 931     {
 932       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
 933     }
 934   else
 935     /* Otherwise, the source is C string and is never relocated
 936        automatically.  Thus we don't have to update anything.  */
 937     ;
 938 }
 939
 940 static void
 941 coding_set_destination (coding)
 942      struct coding_system *coding;
 943 {
 944   if (BUFFERP (coding->dst_object))
 945     {
 946       if (coding->src_pos < 0)
 947         {
 948           coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
 949           coding->dst_bytes = (GAP_END_ADDR
 950                                - (coding->src_bytes - coding->consumed)
 951                                - coding->destination);
 952         }
 953       else
 954         {
 955           /* We are sure that coding->dst_pos_byte is before the gap
 956              of the buffer. */
 957           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
 958                                  + coding->dst_pos_byte - 1);
 959           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
 960                                - coding->destination);
 961         }
 962     }
 963   else
 964     /* Otherwise, the destination is C string and is never relocated
 965        automatically.  Thus we don't have to update anything.  */
 966     ;
 967 }
 968
 969
 970 static void
 971 coding_alloc_by_realloc (coding, bytes)
 972      struct coding_system *coding;
 973      EMACS_INT bytes;
 974 {
 975   coding->destination = (unsigned char *) xrealloc (coding->destination,
 976                                                     coding->dst_bytes + bytes);
 977   coding->dst_bytes += bytes;
 978 }
 979
 980 static void
 981 coding_alloc_by_making_gap (coding, bytes)
 982      struct coding_system *coding;
 983      EMACS_INT bytes;
 984 {
 985   if (BUFFERP (coding->dst_object)
 986       && EQ (coding->src_object, coding->dst_object))
 987     {
 988       EMACS_INT add = coding->src_bytes - coding->consumed;
 989
 990       GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
 991       make_gap (bytes);
 992       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
 993     }
 994   else
 995     {
 996       Lisp_Object this_buffer;
 997
 998       this_buffer = Fcurrent_buffer ();
 999       set_buffer_internal (XBUFFER (coding->dst_object));
1000       make_gap (bytes);
1001       set_buffer_internal (XBUFFER (this_buffer));
1002     }
1003 }
1004
1005
1006 static unsigned char *
1007 alloc_destination (coding, nbytes, dst)
1008      struct coding_system *coding;
1009      EMACS_INT nbytes;
1010      unsigned char *dst;
1011 {
1012   EMACS_INT offset = dst - coding->destination;
1013
1014   if (BUFFERP (coding->dst_object))
1015     coding_alloc_by_making_gap (coding, nbytes);
1016   else
1017     coding_alloc_by_realloc (coding, nbytes);
1018   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1019   coding_set_destination (coding);
1020   dst = coding->destination + offset;
1021   return dst;
1022 }
1023
1024 /** Macros for annotations.  */
1025
1026 /* Maximum length of annotation data (sum of annotations for
1027    composition and charset).  */
1028 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1029
1030 /* An annotation data is stored in the array coding->charbuf in this
1031    format:
1032      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1033    LENGTH is the number of elements in the annotation.
1034    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1035    NCHARS is the number of characters in the text annotated.
1036
1037    The format of the following elements depend on ANNOTATION_MASK.
1038
1039    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1040    follows:
1041      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1042    METHOD is one of enum composition_method.
1043    Optionnal COMPOSITION-COMPONENTS are characters and composition
1044    rules.
1045
1046    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1047    follows.  */
1048
1049 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1050   do {                                                  \
1051     *(buf)++ = -(len);                                  \
1052     *(buf)++ = (mask);                                  \
1053     *(buf)++ = (nchars);                                \
1054     coding->annotated = 1;                              \
1055   } while (0);
1056
1057 #define ADD_COMPOSITION_DATA(buf, nchars, method)                           \
1058   do {                                                                      \
1059     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1060     *buf++ = method;                                                        \
1061   } while (0)
1062
1063
1064 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1065   do {                                                                  \
1066     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1067     *buf++ = id;                                                        \
1068   } while (0)
1069
1070 \f
1071 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1072
1073
1074
1075 \f
1076 /*** 3. UTF-8 ***/
1077
1078 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1079    Check if a text is encoded in UTF-8.  If it is, return 1, else
1080    return 0.  */
1081
1082 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1083 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1084 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1085 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1086 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1087 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1088
1089 static int
1090 detect_coding_utf_8 (coding, detect_info)
1091      struct coding_system *coding;
1092      struct coding_detection_info *detect_info;
1093 {
1094   const unsigned char *src = coding->source, *src_base;
1095   const unsigned char *src_end = coding->source + coding->src_bytes;
1096   int multibytep = coding->src_multibyte;
1097   int consumed_chars = 0;
1098   int found = 0;
1099
1100   detect_info->checked |= CATEGORY_MASK_UTF_8;
1101   /* A coding system of this category is always ASCII compatible.  */
1102   src += coding->head_ascii;
1103
1104   while (1)
1105     {
1106       int c, c1, c2, c3, c4;
1107
1108       src_base = src;
1109       ONE_MORE_BYTE (c);
1110       if (c < 0 || UTF_8_1_OCTET_P (c))
1111         continue;
1112       ONE_MORE_BYTE (c1);
1113       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1114         break;
1115       if (UTF_8_2_OCTET_LEADING_P (c))
1116         {
1117           found = CATEGORY_MASK_UTF_8;
1118           continue;
1119         }
1120       ONE_MORE_BYTE (c2);
1121       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1122         break;
1123       if (UTF_8_3_OCTET_LEADING_P (c))
1124         {
1125           found = CATEGORY_MASK_UTF_8;
1126           continue;
1127         }
1128       ONE_MORE_BYTE (c3);
1129       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1130         break;
1131       if (UTF_8_4_OCTET_LEADING_P (c))
1132         {
1133           found = CATEGORY_MASK_UTF_8;
1134           continue;
1135         }
1136       ONE_MORE_BYTE (c4);
1137       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1138         break;
1139       if (UTF_8_5_OCTET_LEADING_P (c))
1140         {
1141           found = CATEGORY_MASK_UTF_8;
1142           continue;
1143         }
1144       break;
1145     }
1146   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1147   return 0;
1148
1149  no_more_source:
1150   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1151     {
1152       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1153       return 0;
1154     }
1155   detect_info->found |= found;
1156   return 1;
1157 }
1158
1159
1160 static void
1161 decode_coding_utf_8 (coding)
1162      struct coding_system *coding;
1163 {
1164   const unsigned char *src = coding->source + coding->consumed;
1165   const unsigned char *src_end = coding->source + coding->src_bytes;
1166   const unsigned char *src_base;
1167   int *charbuf = coding->charbuf + coding->charbuf_used;
1168   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1169   int consumed_chars = 0, consumed_chars_base;
1170   int multibytep = coding->src_multibyte;
1171   Lisp_Object attr, charset_list;
1172
1173   CODING_GET_INFO (coding, attr, charset_list);
1174
1175   while (1)
1176     {
1177       int c, c1, c2, c3, c4, c5;
1178
1179       src_base = src;
1180       consumed_chars_base = consumed_chars;
1181
1182       if (charbuf >= charbuf_end)
1183         break;
1184
1185       ONE_MORE_BYTE (c1);
1186       if (c1 < 0)
1187         {
1188           c = - c1;
1189         }
1190       else if (UTF_8_1_OCTET_P(c1))
1191         {
1192           c = c1;
1193         }
1194       else
1195         {
1196           ONE_MORE_BYTE (c2);
1197           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1198             goto invalid_code;
1199           if (UTF_8_2_OCTET_LEADING_P (c1))
1200             {
1201               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1202               /* Reject overlong sequences here and below.  Encoders
1203                  producing them are incorrect, they can be misleading,
1204                  and they mess up read/write invariance.  */
1205               if (c < 128)
1206                 goto invalid_code;
1207             }
1208           else
1209             {
1210               ONE_MORE_BYTE (c3);
1211               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1212                 goto invalid_code;
1213               if (UTF_8_3_OCTET_LEADING_P (c1))
1214                 {
1215                   c = (((c1 & 0xF) << 12)
1216                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1217                   if (c < 0x800
1218                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1219                     goto invalid_code;
1220                 }
1221               else
1222                 {
1223                   ONE_MORE_BYTE (c4);
1224                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1225                     goto invalid_code;
1226                   if (UTF_8_4_OCTET_LEADING_P (c1))
1227                     {
1228                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1229                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1230                     if (c < 0x10000)
1231                       goto invalid_code;
1232                     }
1233                   else
1234                     {
1235                       ONE_MORE_BYTE (c5);
1236                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1237                         goto invalid_code;
1238                       if (UTF_8_5_OCTET_LEADING_P (c1))
1239                         {
1240                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1241                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1242                                | (c5 & 0x3F));
1243                           if ((c > MAX_CHAR) || (c < 0x200000))
1244                             goto invalid_code;
1245                         }
1246                       else
1247                         goto invalid_code;
1248                     }
1249                 }
1250             }
1251         }
1252
1253       *charbuf++ = c;
1254       continue;
1255
1256     invalid_code:
1257       src = src_base;
1258       consumed_chars = consumed_chars_base;
1259       ONE_MORE_BYTE (c);
1260       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1261       coding->errors++;
1262     }
1263
1264  no_more_source:
1265   coding->consumed_char += consumed_chars_base;
1266   coding->consumed = src_base - coding->source;
1267   coding->charbuf_used = charbuf - coding->charbuf;
1268 }
1269
1270
1271 static int
1272 encode_coding_utf_8 (coding)
1273      struct coding_system *coding;
1274 {
1275   int multibytep = coding->dst_multibyte;
1276   int *charbuf = coding->charbuf;
1277   int *charbuf_end = charbuf + coding->charbuf_used;
1278   unsigned char *dst = coding->destination + coding->produced;
1279   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1280   int produced_chars = 0;
1281   int c;
1282
1283   if (multibytep)
1284     {
1285       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1286
1287       while (charbuf < charbuf_end)
1288         {
1289           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1290
1291           ASSURE_DESTINATION (safe_room);
1292           c = *charbuf++;
1293           if (CHAR_BYTE8_P (c))
1294             {
1295               c = CHAR_TO_BYTE8 (c);
1296               EMIT_ONE_BYTE (c);
1297             }
1298           else
1299             {
1300               CHAR_STRING_ADVANCE (c, pend);
1301               for (p = str; p < pend; p++)
1302                 EMIT_ONE_BYTE (*p);
1303             }
1304         }
1305     }
1306   else
1307     {
1308       int safe_room = MAX_MULTIBYTE_LENGTH;
1309
1310       while (charbuf < charbuf_end)
1311         {
1312           ASSURE_DESTINATION (safe_room);
1313           c = *charbuf++;
1314           dst += CHAR_STRING (c, dst);
1315           produced_chars++;
1316         }
1317     }
1318   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1319   coding->produced_char += produced_chars;
1320   coding->produced = dst - coding->destination;
1321   return 0;
1322 }
1323
1324
1325 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1326    Check if a text is encoded in one of UTF-16 based coding systems.
1327    If it is, return 1, else return 0.  */
1328
1329 #define UTF_16_HIGH_SURROGATE_P(val) \
1330   (((val) & 0xFC00) == 0xD800)
1331
1332 #define UTF_16_LOW_SURROGATE_P(val) \
1333   (((val) & 0xFC00) == 0xDC00)
1334
1335 #define UTF_16_INVALID_P(val)   \
1336   (((val) == 0xFFFE)            \
1337    || ((val) == 0xFFFF)         \
1338    || UTF_16_LOW_SURROGATE_P (val))
1339
1340
1341 static int
1342 detect_coding_utf_16 (coding, detect_info)
1343      struct coding_system *coding;
1344      struct coding_detection_info *detect_info;
1345 {
1346   const unsigned char *src = coding->source, *src_base = src;
1347   const unsigned char *src_end = coding->source + coding->src_bytes;
1348   int multibytep = coding->src_multibyte;
1349   int consumed_chars = 0;
1350   int c1, c2;
1351
1352   detect_info->checked |= CATEGORY_MASK_UTF_16;
1353   if (coding->mode & CODING_MODE_LAST_BLOCK
1354       && (coding->src_chars & 1))
1355     {
1356       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1357       return 0;
1358     }
1359
1360   ONE_MORE_BYTE (c1);
1361   ONE_MORE_BYTE (c2);
1362   if ((c1 == 0xFF) && (c2 == 0xFE))
1363     {
1364       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1365                              | CATEGORY_MASK_UTF_16_AUTO);
1366       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1367                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1368                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1369     }
1370   else if ((c1 == 0xFE) && (c2 == 0xFF))
1371     {
1372       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1373                              | CATEGORY_MASK_UTF_16_AUTO);
1374       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1375                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1376                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1377     }
1378   else if (c1 >= 0 && c2 >= 0)
1379     {
1380       unsigned char b1[256], b2[256];
1381       int b1_variants = 1, b2_variants = 1;
1382       int n;
1383
1384       bzero (b1, 256), bzero (b2, 256);
1385       b1[c1]++, b2[c2]++;
1386       for (n = 0; n < 256 && src < src_end; n++)
1387         {
1388           src_base = src;
1389           ONE_MORE_BYTE (c1);
1390           ONE_MORE_BYTE (c2);
1391           if (c1 < 0 || c2 < 0)
1392             break;
1393           if (! b1[c1++]) b1_variants++;
1394           if (! b2[c2++]) b2_variants++;
1395         }
1396       if (b1_variants < b2_variants)
1397         detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1398       else
1399         detect_info->found |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1400       detect_info->rejected
1401         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1402     }
1403  no_more_source:
1404   return 1;
1405 }
1406
1407 static void
1408 decode_coding_utf_16 (coding)
1409      struct coding_system *coding;
1410 {
1411   const unsigned char *src = coding->source + coding->consumed;
1412   const unsigned char *src_end = coding->source + coding->src_bytes;
1413   const unsigned char *src_base;
1414   int *charbuf = coding->charbuf + coding->charbuf_used;
1415   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1416   int consumed_chars = 0, consumed_chars_base;
1417   int multibytep = coding->src_multibyte;
1418   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1419   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1420   int surrogate = CODING_UTF_16_SURROGATE (coding);
1421   Lisp_Object attr, charset_list;
1422
1423   CODING_GET_INFO (coding, attr, charset_list);
1424
1425   if (bom == utf_16_with_bom)
1426     {
1427       int c, c1, c2;
1428
1429       src_base = src;
1430       ONE_MORE_BYTE (c1);
1431       ONE_MORE_BYTE (c2);
1432       c = (c1 << 8) | c2;
1433
1434       if (endian == utf_16_big_endian
1435           ? c != 0xFEFF : c != 0xFFFE)
1436         {
1437           /* The first two bytes are not BOM.  Treat them as bytes
1438              for a normal character.  */
1439           src = src_base;
1440           coding->errors++;
1441         }
1442       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1443     }
1444   else if (bom == utf_16_detect_bom)
1445     {
1446       /* We have already tried to detect BOM and failed in
1447          detect_coding.  */
1448       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1449     }
1450
1451   while (1)
1452     {
1453       int c, c1, c2;
1454
1455       src_base = src;
1456       consumed_chars_base = consumed_chars;
1457
1458       if (charbuf + 2 >= charbuf_end)
1459         break;
1460
1461       ONE_MORE_BYTE (c1);
1462       if (c1 < 0)
1463         {
1464           *charbuf++ = -c1;
1465           continue;
1466         }
1467       ONE_MORE_BYTE (c2);
1468       if (c2 < 0)
1469         {
1470           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1471           *charbuf++ = -c2;
1472           continue;
1473         }
1474       c = (endian == utf_16_big_endian
1475            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1476       if (surrogate)
1477         {
1478           if (! UTF_16_LOW_SURROGATE_P (c))
1479             {
1480               if (endian == utf_16_big_endian)
1481                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1482               else
1483                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1484               *charbuf++ = c1;
1485               *charbuf++ = c2;
1486               coding->errors++;
1487               if (UTF_16_HIGH_SURROGATE_P (c))
1488                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1489               else
1490                 *charbuf++ = c;
1491             }
1492           else
1493             {
1494               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1495               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1496               *charbuf++ = 0x10000 + c;
1497             }
1498         }
1499       else
1500         {
1501           if (UTF_16_HIGH_SURROGATE_P (c))
1502             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1503           else
1504             *charbuf++ = c;
1505         }
1506     }
1507
1508  no_more_source:
1509   coding->consumed_char += consumed_chars_base;
1510   coding->consumed = src_base - coding->source;
1511   coding->charbuf_used = charbuf - coding->charbuf;
1512 }
1513
1514 static int
1515 encode_coding_utf_16 (coding)
1516      struct coding_system *coding;
1517 {
1518   int multibytep = coding->dst_multibyte;
1519   int *charbuf = coding->charbuf;
1520   int *charbuf_end = charbuf + coding->charbuf_used;
1521   unsigned char *dst = coding->destination + coding->produced;
1522   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1523   int safe_room = 8;
1524   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1525   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1526   int produced_chars = 0;
1527   Lisp_Object attrs, charset_list;
1528   int c;
1529
1530   CODING_GET_INFO (coding, attrs, charset_list);
1531
1532   if (bom != utf_16_without_bom)
1533     {
1534       ASSURE_DESTINATION (safe_room);
1535       if (big_endian)
1536         EMIT_TWO_BYTES (0xFE, 0xFF);
1537       else
1538         EMIT_TWO_BYTES (0xFF, 0xFE);
1539       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1540     }
1541
1542   while (charbuf < charbuf_end)
1543     {
1544       ASSURE_DESTINATION (safe_room);
1545       c = *charbuf++;
1546       if (c >= MAX_UNICODE_CHAR)
1547         c = coding->default_char;
1548
1549       if (c < 0x10000)
1550         {
1551           if (big_endian)
1552             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1553           else
1554             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1555         }
1556       else
1557         {
1558           int c1, c2;
1559
1560           c -= 0x10000;
1561           c1 = (c >> 10) + 0xD800;
1562           c2 = (c & 0x3FF) + 0xDC00;
1563           if (big_endian)
1564             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1565           else
1566             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1567         }
1568     }
1569   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1570   coding->produced = dst - coding->destination;
1571   coding->produced_char += produced_chars;
1572   return 0;
1573 }
1574
1575 \f
1576 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1577
1578 /* Emacs' internal format for representation of multiple character
1579    sets is a kind of multi-byte encoding, i.e. characters are
1580    represented by variable-length sequences of one-byte codes.
1581
1582    ASCII characters and control characters (e.g. `tab', `newline') are
1583    represented by one-byte sequences which are their ASCII codes, in
1584    the range 0x00 through 0x7F.
1585
1586    8-bit characters of the range 0x80..0x9F are represented by
1587    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1588    code + 0x20).
1589
1590    8-bit characters of the range 0xA0..0xFF are represented by
1591    one-byte sequences which are their 8-bit code.
1592
1593    The other characters are represented by a sequence of `base
1594    leading-code', optional `extended leading-code', and one or two
1595    `position-code's.  The length of the sequence is determined by the
1596    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1597    whereas extended leading-code and position-code take the range 0xA0
1598    through 0xFF.  See `charset.h' for more details about leading-code
1599    and position-code.
1600
1601    --- CODE RANGE of Emacs' internal format ---
1602    character set        range
1603    -------------        -----
1604    ascii                0x00..0x7F
1605    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1606    eight-bit-graphic    0xA0..0xBF
1607    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1608    ---------------------------------------------
1609
1610    As this is the internal character representation, the format is
1611    usually not used externally (i.e. in a file or in a data sent to a
1612    process).  But, it is possible to have a text externally in this
1613    format (i.e. by encoding by the coding system `emacs-mule').
1614
1615    In that case, a sequence of one-byte codes has a slightly different
1616    form.
1617
1618    At first, all characters in eight-bit-control are represented by
1619    one-byte sequences which are their 8-bit code.
1620
1621    Next, character composition data are represented by the byte
1622    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1623    where,
1624         METHOD is 0xF0 plus one of composition method (enum
1625         composition_method),
1626
1627         BYTES is 0xA0 plus a byte length of this composition data,
1628
1629         CHARS is 0x20 plus a number of characters composed by this
1630         data,
1631
1632         COMPONENTs are characters of multibye form or composition
1633         rules encoded by two-byte of ASCII codes.
1634
1635    In addition, for backward compatibility, the following formats are
1636    also recognized as composition data on decoding.
1637
1638    0x80 MSEQ ...
1639    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1640
1641    Here,
1642         MSEQ is a multibyte form but in these special format:
1643           ASCII: 0xA0 ASCII_CODE+0x80,
1644           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1645         RULE is a one byte code of the range 0xA0..0xF0 that
1646         represents a composition rule.
1647   */
1648
1649 char emacs_mule_bytes[256];
1650
1651 int
1652 emacs_mule_char (coding, src, nbytes, nchars, id)
1653      struct coding_system *coding;
1654      const unsigned char *src;
1655      int *nbytes, *nchars, *id;
1656 {
1657   const unsigned char *src_end = coding->source + coding->src_bytes;
1658   const unsigned char *src_base = src;
1659   int multibytep = coding->src_multibyte;
1660   struct charset *charset;
1661   unsigned code;
1662   int c;
1663   int consumed_chars = 0;
1664
1665   ONE_MORE_BYTE (c);
1666   if (c < 0)
1667     {
1668       c = -c;
1669       charset = emacs_mule_charset[0];
1670     }
1671   else
1672     {
1673       switch (emacs_mule_bytes[c])
1674         {
1675         case 2:
1676           if (! (charset = emacs_mule_charset[c]))
1677             goto invalid_code;
1678           ONE_MORE_BYTE (c);
1679           if (c < 0)
1680             goto invalid_code;
1681           code = c & 0x7F;
1682           break;
1683
1684         case 3:
1685           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1686               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1687             {
1688               ONE_MORE_BYTE (c);
1689               if (c < 0 || ! (charset = emacs_mule_charset[c]))
1690                 goto invalid_code;
1691               ONE_MORE_BYTE (c);
1692               if (c < 0)
1693                 goto invalid_code;
1694               code = c & 0x7F;
1695             }
1696           else
1697             {
1698               if (! (charset = emacs_mule_charset[c]))
1699                 goto invalid_code;
1700               ONE_MORE_BYTE (c);
1701               if (c < 0)
1702                 goto invalid_code;
1703               code = (c & 0x7F) << 8;
1704               ONE_MORE_BYTE (c);
1705               if (c < 0)
1706                 goto invalid_code;
1707               code |= c & 0x7F;
1708             }
1709           break;
1710
1711         case 4:
1712           ONE_MORE_BYTE (c);
1713           if (c < 0 || ! (charset = emacs_mule_charset[c]))
1714             goto invalid_code;
1715           ONE_MORE_BYTE (c);
1716           if (c < 0)
1717             goto invalid_code;
1718           code = (c & 0x7F) << 8;
1719           ONE_MORE_BYTE (c);
1720           if (c < 0)
1721             goto invalid_code;
1722           code |= c & 0x7F;
1723           break;
1724
1725         case 1:
1726           code = c;
1727           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1728                                      ? charset_ascii : charset_eight_bit);
1729           break;
1730
1731         default:
1732           abort ();
1733         }
1734       c = DECODE_CHAR (charset, code);
1735       if (c < 0)
1736         goto invalid_code;
1737     }
1738   *nbytes = src - src_base;
1739   *nchars = consumed_chars;
1740   if (id)
1741     *id = charset->id;
1742   return c;
1743
1744  no_more_source:
1745   return -2;
1746
1747  invalid_code:
1748   return -1;
1749 }
1750
1751
1752 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1753    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1754    else return 0.  */
1755
1756 static int
1757 detect_coding_emacs_mule (coding, detect_info)
1758      struct coding_system *coding;
1759      struct coding_detection_info *detect_info;
1760 {
1761   const unsigned char *src = coding->source, *src_base;
1762   const unsigned char *src_end = coding->source + coding->src_bytes;
1763   int multibytep = coding->src_multibyte;
1764   int consumed_chars = 0;
1765   int c;
1766   int found = 0;
1767
1768   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1769   /* A coding system of this category is always ASCII compatible.  */
1770   src += coding->head_ascii;
1771
1772   while (1)
1773     {
1774       src_base = src;
1775       ONE_MORE_BYTE (c);
1776       if (c < 0)
1777         continue;
1778       if (c == 0x80)
1779         {
1780           /* Perhaps the start of composite character.  We simple skip
1781              it because analyzing it is too heavy for detecting.  But,
1782              at least, we check that the composite character
1783              constitues of more than 4 bytes.  */
1784           const unsigned char *src_base;
1785
1786         repeat:
1787           src_base = src;
1788           do
1789             {
1790               ONE_MORE_BYTE (c);
1791             }
1792           while (c >= 0xA0);
1793
1794           if (src - src_base <= 4)
1795             break;
1796           found = CATEGORY_MASK_EMACS_MULE;
1797           if (c == 0x80)
1798             goto repeat;
1799         }
1800
1801       if (c < 0x80)
1802         {
1803           if (c < 0x20
1804               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1805             break;
1806         }
1807       else
1808         {
1809           int more_bytes = emacs_mule_bytes[*src_base] - 1;
1810
1811           while (more_bytes > 0)
1812             {
1813               ONE_MORE_BYTE (c);
1814               if (c < 0xA0)
1815                 {
1816                   src--;        /* Unread the last byte.  */
1817                   break;
1818                 }
1819               more_bytes--;
1820             }
1821           if (more_bytes != 0)
1822             break;
1823           found = CATEGORY_MASK_EMACS_MULE;
1824         }
1825     }
1826   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1827   return 0;
1828
1829  no_more_source:
1830   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1831     {
1832       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1833       return 0;
1834     }
1835   detect_info->found |= found;
1836   return 1;
1837 }
1838
1839
1840 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1841
1842 /* Decode a character represented as a component of composition
1843    sequence of Emacs 20/21 style at SRC.  Set C to that character and
1844    update SRC to the head of next character (or an encoded composition
1845    rule).  If SRC doesn't points a composition component, set C to -1.
1846    If SRC points an invalid byte sequence, global exit by a return
1847    value 0.  */
1848
1849 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
1850   if (1)                                                        \
1851     {                                                           \
1852       int c;                                                    \
1853       int nbytes, nchars;                                       \
1854                                                                 \
1855       if (src == src_end)                                       \
1856         break;                                                  \
1857       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1858       if (c < 0)                                                \
1859         {                                                       \
1860           if (c == -2)                                          \
1861             break;                                              \
1862           goto invalid_code;                                    \
1863         }                                                       \
1864       *buf++ = c;                                               \
1865       src += nbytes;                                            \
1866       consumed_chars += nchars;                                 \
1867     }                                                           \
1868   else
1869
1870
1871 /* Decode a composition rule represented as a component of composition
1872    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
1873    and increment BUF.  If SRC points an invalid byte sequence, set C
1874    to -1.  */
1875
1876 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
1877   do {                                                  \
1878     int c, gref, nref;                                  \
1879                                                         \
1880     if (src >= src_end)                                 \
1881       goto invalid_code;                                \
1882     ONE_MORE_BYTE_NO_CHECK (c);                         \
1883     c -= 0x20;                                          \
1884     if (c < 0 || c >= 81)                               \
1885       goto invalid_code;                                \
1886                                                         \
1887     gref = c / 9, nref = c % 9;                         \
1888     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1889   } while (0)
1890
1891
1892 /* Decode a composition rule represented as a component of composition
1893    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
1894    and increment BUF.  If SRC points an invalid byte sequence, set C
1895    to -1.  */
1896
1897 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
1898   do {                                                  \
1899     int gref, nref;                                     \
1900                                                         \
1901     if (src + 1>= src_end)                              \
1902       goto invalid_code;                                \
1903     ONE_MORE_BYTE_NO_CHECK (gref);                      \
1904     gref -= 0x20;                                       \
1905     ONE_MORE_BYTE_NO_CHECK (nref);                      \
1906     nref -= 0x20;                                       \
1907     if (gref < 0 || gref >= 81                          \
1908         || nref < 0 || nref >= 81)                      \
1909       goto invalid_code;                                \
1910     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1911   } while (0)
1912
1913
1914 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
1915   do {                                                                  \
1916     /* Emacs 21 style format.  The first three bytes at SRC are         \
1917        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
1918        the byte length of this composition information, CHARS is the    \
1919        number of characters composed by this composition.  */           \
1920     enum composition_method method = c - 0xF2;                          \
1921     int *charbuf_base = charbuf;                                        \
1922     int consumed_chars_limit;                                           \
1923     int nbytes, nchars;                                                 \
1924                                                                         \
1925     ONE_MORE_BYTE (c);                                                  \
1926     if (c < 0)                                                          \
1927       goto invalid_code;                                                \
1928     nbytes = c - 0xA0;                                                  \
1929     if (nbytes < 3)                                                     \
1930       goto invalid_code;                                                \
1931     ONE_MORE_BYTE (c);                                                  \
1932     if (c < 0)                                                          \
1933       goto invalid_code;                                                \
1934     nchars = c - 0xA0;                                                  \
1935     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
1936     consumed_chars_limit = consumed_chars_base + nbytes;                \
1937     if (method != COMPOSITION_RELATIVE)                                 \
1938       {                                                                 \
1939         int i = 0;                                                      \
1940         while (consumed_chars < consumed_chars_limit)                   \
1941           {                                                             \
1942             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
1943               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
1944             else                                                        \
1945               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
1946             i++;                                                        \
1947           }                                                             \
1948         if (consumed_chars < consumed_chars_limit)                      \
1949           goto invalid_code;                                            \
1950         charbuf_base[0] -= i;                                           \
1951       }                                                                 \
1952   } while (0)
1953
1954
1955 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)            \
1956   do {                                                          \
1957     /* Emacs 20 style format for relative composition.  */      \
1958     /* Store multibyte form of characters to be composed.  */   \
1959     enum composition_method method = COMPOSITION_RELATIVE;      \
1960     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1961     int *buf = components;                                      \
1962     int i, j;                                                   \
1963                                                                 \
1964     src = src_base;                                             \
1965     ONE_MORE_BYTE (c);          /* skip 0x80 */                 \
1966     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1967       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                 \
1968     if (i < 2)                                                  \
1969       goto invalid_code;                                        \
1970     ADD_COMPOSITION_DATA (charbuf, i, method);                  \
1971     for (j = 0; j < i; j++)                                     \
1972       *charbuf++ = components[j];                               \
1973   } while (0)
1974
1975
1976 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
1977   do {                                                          \
1978     /* Emacs 20 style format for rule-base composition.  */     \
1979     /* Store multibyte form of characters to be composed.  */   \
1980     enum composition_method method = COMPOSITION_WITH_RULE;     \
1981     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1982     int *buf = components;                                      \
1983     int i, j;                                                   \
1984                                                                 \
1985     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
1986     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1987       {                                                         \
1988         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
1989         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
1990       }                                                         \
1991     if (i < 1 || (buf - components) % 2 == 0)                   \
1992       goto invalid_code;                                        \
1993     if (charbuf + i + (i / 2) + 1 < charbuf_end)                \
1994       goto no_more_source;                                      \
1995     ADD_COMPOSITION_DATA (buf, i, method);                      \
1996     for (j = 0; j < i; j++)                                     \
1997       *charbuf++ = components[j];                               \
1998     for (j = 0; j < i; j += 2)                                  \
1999       *charbuf++ = components[j];                               \
2000   } while (0)
2001
2002
2003 static void
2004 decode_coding_emacs_mule (coding)
2005      struct coding_system *coding;
2006 {
2007   const unsigned char *src = coding->source + coding->consumed;
2008   const unsigned char *src_end = coding->source + coding->src_bytes;
2009   const unsigned char *src_base;
2010   int *charbuf = coding->charbuf + coding->charbuf_used;
2011   int *charbuf_end
2012     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2013   int consumed_chars = 0, consumed_chars_base;
2014   int multibytep = coding->src_multibyte;
2015   Lisp_Object attrs, charset_list;
2016   int char_offset = coding->produced_char;
2017   int last_offset = char_offset;
2018   int last_id = charset_ascii;
2019
2020   CODING_GET_INFO (coding, attrs, charset_list);
2021
2022   while (1)
2023     {
2024       int c;
2025
2026       src_base = src;
2027       consumed_chars_base = consumed_chars;
2028
2029       if (charbuf >= charbuf_end)
2030         break;
2031
2032       ONE_MORE_BYTE (c);
2033       if (c < 0)
2034         {
2035           *charbuf++ = -c;
2036           char_offset++;
2037         }
2038       else if (c < 0x80)
2039         {
2040           *charbuf++ = c;
2041           char_offset++;
2042         }
2043       else if (c == 0x80)
2044         {
2045           ONE_MORE_BYTE (c);
2046           if (c < 0)
2047             goto invalid_code;
2048           if (c - 0xF2 >= COMPOSITION_RELATIVE
2049               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2050             DECODE_EMACS_MULE_21_COMPOSITION (c);
2051           else if (c < 0xC0)
2052             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2053           else if (c == 0xFF)
2054             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2055           else
2056             goto invalid_code;
2057         }
2058       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2059         {
2060           int nbytes, nchars;
2061           int id;
2062
2063           src = src_base;
2064           consumed_chars = consumed_chars_base;
2065           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2066           if (c < 0)
2067             {
2068               if (c == -2)
2069                 break;
2070               goto invalid_code;
2071             }
2072           if (last_id != id)
2073             {
2074               if (last_id != charset_ascii)
2075                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2076               last_id = id;
2077               last_offset = char_offset;
2078             }
2079           *charbuf++ = c;
2080           src += nbytes;
2081           consumed_chars += nchars;
2082           char_offset++;
2083         }
2084       continue;
2085
2086     invalid_code:
2087       src = src_base;
2088       consumed_chars = consumed_chars_base;
2089       ONE_MORE_BYTE (c);
2090       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2091       char_offset++;
2092       coding->errors++;
2093     }
2094
2095  no_more_source:
2096   if (last_id != charset_ascii)
2097     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2098   coding->consumed_char += consumed_chars_base;
2099   coding->consumed = src_base - coding->source;
2100   coding->charbuf_used = charbuf - coding->charbuf;
2101 }
2102
2103
2104 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2105   do {                                          \
2106     if (id < 0xA0)                              \
2107       codes[0] = id, codes[1] = 0;              \
2108     else if (id < 0xE0)                         \
2109       codes[0] = 0x9A, codes[1] = id;           \
2110     else if (id < 0xF0)                         \
2111       codes[0] = 0x9B, codes[1] = id;           \
2112     else if (id < 0xF5)                         \
2113       codes[0] = 0x9C, codes[1] = id;           \
2114     else                                        \
2115       codes[0] = 0x9D, codes[1] = id;           \
2116   } while (0);
2117
2118
2119 static int
2120 encode_coding_emacs_mule (coding)
2121      struct coding_system *coding;
2122 {
2123   int multibytep = coding->dst_multibyte;
2124   int *charbuf = coding->charbuf;
2125   int *charbuf_end = charbuf + coding->charbuf_used;
2126   unsigned char *dst = coding->destination + coding->produced;
2127   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2128   int safe_room = 8;
2129   int produced_chars = 0;
2130   Lisp_Object attrs, charset_list;
2131   int c;
2132   int preferred_charset_id = -1;
2133
2134   CODING_GET_INFO (coding, attrs, charset_list);
2135   if (! EQ (charset_list, Vemacs_mule_charset_list))
2136     {
2137       CODING_ATTR_CHARSET_LIST (attrs)
2138         = charset_list = Vemacs_mule_charset_list;
2139     }
2140
2141   while (charbuf < charbuf_end)
2142     {
2143       ASSURE_DESTINATION (safe_room);
2144       c = *charbuf++;
2145
2146       if (c < 0)
2147         {
2148           /* Handle an annotation.  */
2149           switch (*charbuf)
2150             {
2151             case CODING_ANNOTATE_COMPOSITION_MASK:
2152               /* Not yet implemented.  */
2153               break;
2154             case CODING_ANNOTATE_CHARSET_MASK:
2155               preferred_charset_id = charbuf[3];
2156               if (preferred_charset_id >= 0
2157                   && NILP (Fmemq (make_number (preferred_charset_id),
2158                                   charset_list)))
2159                 preferred_charset_id = -1;
2160               break;
2161             default:
2162               abort ();
2163             }
2164           charbuf += -c - 1;
2165           continue;
2166         }
2167
2168       if (ASCII_CHAR_P (c))
2169         EMIT_ONE_ASCII_BYTE (c);
2170       else if (CHAR_BYTE8_P (c))
2171         {
2172           c = CHAR_TO_BYTE8 (c);
2173           EMIT_ONE_BYTE (c);
2174         }
2175       else
2176         {
2177           struct charset *charset;
2178           unsigned code;
2179           int dimension;
2180           int emacs_mule_id;
2181           unsigned char leading_codes[2];
2182
2183           if (preferred_charset_id >= 0)
2184             {
2185               charset = CHARSET_FROM_ID (preferred_charset_id);
2186               if (! CHAR_CHARSET_P (c, charset))
2187                 charset = char_charset (c, charset_list, NULL);
2188             }
2189           else
2190             charset = char_charset (c, charset_list, &code);
2191           if (! charset)
2192             {
2193               c = coding->default_char;
2194               if (ASCII_CHAR_P (c))
2195                 {
2196                   EMIT_ONE_ASCII_BYTE (c);
2197                   continue;
2198                 }
2199               charset = char_charset (c, charset_list, &code);
2200             }
2201           dimension = CHARSET_DIMENSION (charset);
2202           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2203           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2204           EMIT_ONE_BYTE (leading_codes[0]);
2205           if (leading_codes[1])
2206             EMIT_ONE_BYTE (leading_codes[1]);
2207           if (dimension == 1)
2208             EMIT_ONE_BYTE (code | 0x80);
2209           else
2210             {
2211               code |= 0x8080;
2212               EMIT_ONE_BYTE (code >> 8);
2213               EMIT_ONE_BYTE (code & 0xFF);
2214             }
2215         }
2216     }
2217   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2218   coding->produced_char += produced_chars;
2219   coding->produced = dst - coding->destination;
2220   return 0;
2221 }
2222
2223 \f
2224 /*** 7. ISO2022 handlers ***/
2225
2226 /* The following note describes the coding system ISO2022 briefly.
2227    Since the intention of this note is to help understand the
2228    functions in this file, some parts are NOT ACCURATE or are OVERLY
2229    SIMPLIFIED.  For thorough understanding, please refer to the
2230    original document of ISO2022.  This is equivalent to the standard
2231    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2232
2233    ISO2022 provides many mechanisms to encode several character sets
2234    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2235    is encoded using bytes less than 128.  This may make the encoded
2236    text a little bit longer, but the text passes more easily through
2237    several types of gateway, some of which strip off the MSB (Most
2238    Significant Bit).
2239
2240    There are two kinds of character sets: control character sets and
2241    graphic character sets.  The former contain control characters such
2242    as `newline' and `escape' to provide control functions (control
2243    functions are also provided by escape sequences).  The latter
2244    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2245    two control character sets and many graphic character sets.
2246
2247    Graphic character sets are classified into one of the following
2248    four classes, according to the number of bytes (DIMENSION) and
2249    number of characters in one dimension (CHARS) of the set:
2250    - DIMENSION1_CHARS94
2251    - DIMENSION1_CHARS96
2252    - DIMENSION2_CHARS94
2253    - DIMENSION2_CHARS96
2254
2255    In addition, each character set is assigned an identification tag,
2256    unique for each set, called the "final character" (denoted as <F>
2257    hereafter).  The <F> of each character set is decided by ECMA(*)
2258    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2259    (0x30..0x3F are for private use only).
2260
2261    Note (*): ECMA = European Computer Manufacturers Association
2262
2263    Here are examples of graphic character sets [NAME(<F>)]:
2264         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2265         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2266         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2267         o DIMENSION2_CHARS96 -- none for the moment
2268
2269    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2270         C0 [0x00..0x1F] -- control character plane 0
2271         GL [0x20..0x7F] -- graphic character plane 0
2272         C1 [0x80..0x9F] -- control character plane 1
2273         GR [0xA0..0xFF] -- graphic character plane 1
2274
2275    A control character set is directly designated and invoked to C0 or
2276    C1 by an escape sequence.  The most common case is that:
2277    - ISO646's  control character set is designated/invoked to C0, and
2278    - ISO6429's control character set is designated/invoked to C1,
2279    and usually these designations/invocations are omitted in encoded
2280    text.  In a 7-bit environment, only C0 can be used, and a control
2281    character for C1 is encoded by an appropriate escape sequence to
2282    fit into the environment.  All control characters for C1 are
2283    defined to have corresponding escape sequences.
2284
2285    A graphic character set is at first designated to one of four
2286    graphic registers (G0 through G3), then these graphic registers are
2287    invoked to GL or GR.  These designations and invocations can be
2288    done independently.  The most common case is that G0 is invoked to
2289    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2290    these invocations and designations are omitted in encoded text.
2291    In a 7-bit environment, only GL can be used.
2292
2293    When a graphic character set of CHARS94 is invoked to GL, codes
2294    0x20 and 0x7F of the GL area work as control characters SPACE and
2295    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2296    be used.
2297
2298    There are two ways of invocation: locking-shift and single-shift.
2299    With locking-shift, the invocation lasts until the next different
2300    invocation, whereas with single-shift, the invocation affects the
2301    following character only and doesn't affect the locking-shift
2302    state.  Invocations are done by the following control characters or
2303    escape sequences:
2304
2305    ----------------------------------------------------------------------
2306    abbrev  function                  cntrl escape seq   description
2307    ----------------------------------------------------------------------
2308    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2309    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2310    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2311    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2312    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2313    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2314    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2315    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2316    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2317    ----------------------------------------------------------------------
2318    (*) These are not used by any known coding system.
2319
2320    Control characters for these functions are defined by macros
2321    ISO_CODE_XXX in `coding.h'.
2322
2323    Designations are done by the following escape sequences:
2324    ----------------------------------------------------------------------
2325    escape sequence      description
2326    ----------------------------------------------------------------------
2327    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2328    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2329    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2330    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2331    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2332    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2333    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2334    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2335    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2336    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2337    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2338    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2339    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2340    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2341    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2342    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2343    ----------------------------------------------------------------------
2344
2345    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2346    of dimension 1, chars 94, and final character <F>, etc...
2347
2348    Note (*): Although these designations are not allowed in ISO2022,
2349    Emacs accepts them on decoding, and produces them on encoding
2350    CHARS96 character sets in a coding system which is characterized as
2351    7-bit environment, non-locking-shift, and non-single-shift.
2352
2353    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2354    '(' must be omitted.  We refer to this as "short-form" hereafter.
2355
2356    Now you may notice that there are a lot of ways of encoding the
2357    same multilingual text in ISO2022.  Actually, there exist many
2358    coding systems such as Compound Text (used in X11's inter client
2359    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2360    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2361    localized platforms), and all of these are variants of ISO2022.
2362
2363    In addition to the above, Emacs handles two more kinds of escape
2364    sequences: ISO6429's direction specification and Emacs' private
2365    sequence for specifying character composition.
2366
2367    ISO6429's direction specification takes the following form:
2368         o CSI ']'      -- end of the current direction
2369         o CSI '0' ']'  -- end of the current direction
2370         o CSI '1' ']'  -- start of left-to-right text
2371         o CSI '2' ']'  -- start of right-to-left text
2372    The control character CSI (0x9B: control sequence introducer) is
2373    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2374
2375    Character composition specification takes the following form:
2376         o ESC '0' -- start relative composition
2377         o ESC '1' -- end composition
2378         o ESC '2' -- start rule-base composition (*)
2379         o ESC '3' -- start relative composition with alternate chars  (**)
2380         o ESC '4' -- start rule-base composition with alternate chars  (**)
2381   Since these are not standard escape sequences of any ISO standard,
2382   the use of them with these meanings is restricted to Emacs only.
2383
2384   (*) This form is used only in Emacs 20.7 and older versions,
2385   but newer versions can safely decode it.
2386   (**) This form is used only in Emacs 21.1 and newer versions,
2387   and older versions can't decode it.
2388
2389   Here's a list of example usages of these composition escape
2390   sequences (categorized by `enum composition_method').
2391
2392   COMPOSITION_RELATIVE:
2393         ESC 0 CHAR [ CHAR ] ESC 1
2394   COMPOSITION_WITH_RULE:
2395         ESC 2 CHAR [ RULE CHAR ] ESC 1
2396   COMPOSITION_WITH_ALTCHARS:
2397         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2398   COMPOSITION_WITH_RULE_ALTCHARS:
2399         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2400
2401 enum iso_code_class_type iso_code_class[256];
2402
2403 #define SAFE_CHARSET_P(coding, id)      \
2404   ((id) <= (coding)->max_charset_id     \
2405    && (coding)->safe_charsets[id] >= 0)
2406
2407
2408 #define SHIFT_OUT_OK(category)  \
2409   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2410
2411 static void
2412 setup_iso_safe_charsets (attrs)
2413      Lisp_Object attrs;
2414 {
2415   Lisp_Object charset_list, safe_charsets;
2416   Lisp_Object request;
2417   Lisp_Object reg_usage;
2418   Lisp_Object tail;
2419   int reg94, reg96;
2420   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2421   int max_charset_id;
2422
2423   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2424   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2425       && ! EQ (charset_list, Viso_2022_charset_list))
2426     {
2427       CODING_ATTR_CHARSET_LIST (attrs)
2428         = charset_list = Viso_2022_charset_list;
2429       ASET (attrs, coding_attr_safe_charsets, Qnil);
2430     }
2431
2432   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2433     return;
2434
2435   max_charset_id = 0;
2436   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2437     {
2438       int id = XINT (XCAR (tail));
2439       if (max_charset_id < id)
2440         max_charset_id = id;
2441     }
2442
2443   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2444                                 make_number (255));
2445   request = AREF (attrs, coding_attr_iso_request);
2446   reg_usage = AREF (attrs, coding_attr_iso_usage);
2447   reg94 = XINT (XCAR (reg_usage));
2448   reg96 = XINT (XCDR (reg_usage));
2449
2450   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2451     {
2452       Lisp_Object id;
2453       Lisp_Object reg;
2454       struct charset *charset;
2455
2456       id = XCAR (tail);
2457       charset = CHARSET_FROM_ID (XINT (id));
2458       reg = Fcdr (Fassq (id, request));
2459       if (! NILP (reg))
2460         SSET (safe_charsets, XINT (id), XINT (reg));
2461       else if (charset->iso_chars_96)
2462         {
2463           if (reg96 < 4)
2464             SSET (safe_charsets, XINT (id), reg96);
2465         }
2466       else
2467         {
2468           if (reg94 < 4)
2469             SSET (safe_charsets, XINT (id), reg94);
2470         }
2471     }
2472   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2473 }
2474
2475
2476 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2477    Check if a text is encoded in one of ISO-2022 based codig systems.
2478    If it is, return 1, else return 0.  */
2479
2480 static int
2481 detect_coding_iso_2022 (coding, detect_info)
2482      struct coding_system *coding;
2483      struct coding_detection_info *detect_info;
2484 {
2485   const unsigned char *src = coding->source, *src_base = src;
2486   const unsigned char *src_end = coding->source + coding->src_bytes;
2487   int multibytep = coding->src_multibyte;
2488   int single_shifting = 0;
2489   int id;
2490   int c, c1;
2491   int consumed_chars = 0;
2492   int i;
2493   int rejected = 0;
2494   int found = 0;
2495
2496   detect_info->checked |= CATEGORY_MASK_ISO;
2497
2498   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2499     {
2500       struct coding_system *this = &(coding_categories[i]);
2501       Lisp_Object attrs, val;
2502
2503       attrs = CODING_ID_ATTRS (this->id);
2504       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2505           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2506         setup_iso_safe_charsets (attrs);
2507       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2508       this->max_charset_id = SCHARS (val) - 1;
2509       this->safe_charsets = (char *) SDATA (val);
2510     }
2511
2512   /* A coding system of this category is always ASCII compatible.  */
2513   src += coding->head_ascii;
2514
2515   while (rejected != CATEGORY_MASK_ISO)
2516     {
2517       src_base = src;
2518       ONE_MORE_BYTE (c);
2519       switch (c)
2520         {
2521         case ISO_CODE_ESC:
2522           if (inhibit_iso_escape_detection)
2523             break;
2524           single_shifting = 0;
2525           ONE_MORE_BYTE (c);
2526           if (c >= '(' && c <= '/')
2527             {
2528               /* Designation sequence for a charset of dimension 1.  */
2529               ONE_MORE_BYTE (c1);
2530               if (c1 < ' ' || c1 >= 0x80
2531                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2532                 /* Invalid designation sequence.  Just ignore.  */
2533                 break;
2534             }
2535           else if (c == '$')
2536             {
2537               /* Designation sequence for a charset of dimension 2.  */
2538               ONE_MORE_BYTE (c);
2539               if (c >= '@' && c <= 'B')
2540                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2541                 id = iso_charset_table[1][0][c];
2542               else if (c >= '(' && c <= '/')
2543                 {
2544                   ONE_MORE_BYTE (c1);
2545                   if (c1 < ' ' || c1 >= 0x80
2546                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2547                     /* Invalid designation sequence.  Just ignore.  */
2548                     break;
2549                 }
2550               else
2551                 /* Invalid designation sequence.  Just ignore it.  */
2552                 break;
2553             }
2554           else if (c == 'N' || c == 'O')
2555             {
2556               /* ESC <Fe> for SS2 or SS3.  */
2557               single_shifting = 1;
2558               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2559               break;
2560             }
2561           else if (c >= '0' && c <= '4')
2562             {
2563               /* ESC <Fp> for start/end composition.  */
2564               found |= CATEGORY_MASK_ISO;
2565               break;
2566             }
2567           else
2568             {
2569               /* Invalid escape sequence.  Just ignore it.  */
2570               break;
2571             }
2572
2573           /* We found a valid designation sequence for CHARSET.  */
2574           rejected |= CATEGORY_MASK_ISO_8BIT;
2575           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2576                               id))
2577             found |= CATEGORY_MASK_ISO_7;
2578           else
2579             rejected |= CATEGORY_MASK_ISO_7;
2580           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2581                               id))
2582             found |= CATEGORY_MASK_ISO_7_TIGHT;
2583           else
2584             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2585           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2586                               id))
2587             found |= CATEGORY_MASK_ISO_7_ELSE;
2588           else
2589             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2590           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2591                               id))
2592             found |= CATEGORY_MASK_ISO_8_ELSE;
2593           else
2594             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2595           break;
2596
2597         case ISO_CODE_SO:
2598         case ISO_CODE_SI:
2599           /* Locking shift out/in.  */
2600           if (inhibit_iso_escape_detection)
2601             break;
2602           single_shifting = 0;
2603           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2604           found |= CATEGORY_MASK_ISO_ELSE;
2605           break;
2606
2607         case ISO_CODE_CSI:
2608           /* Control sequence introducer.  */
2609           single_shifting = 0;
2610           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2611           found |= CATEGORY_MASK_ISO_8_ELSE;
2612           goto check_extra_latin;
2613
2614         case ISO_CODE_SS2:
2615         case ISO_CODE_SS3:
2616           /* Single shift.   */
2617           if (inhibit_iso_escape_detection)
2618             break;
2619           single_shifting = 0;
2620           rejected |= CATEGORY_MASK_ISO_7BIT;
2621           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2622               & CODING_ISO_FLAG_SINGLE_SHIFT)
2623             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
2624           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2625               & CODING_ISO_FLAG_SINGLE_SHIFT)
2626             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2627           if (single_shifting)
2628             break;
2629           goto check_extra_latin;
2630
2631         default:
2632           if (c < 0)
2633             continue;
2634           if (c < 0x80)
2635             {
2636               single_shifting = 0;
2637               break;
2638             }
2639           if (c >= 0xA0)
2640             {
2641               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2642               found |= CATEGORY_MASK_ISO_8_1;
2643               /* Check the length of succeeding codes of the range
2644                  0xA0..0FF.  If the byte length is even, we include
2645                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2646                  only when we are not single shifting.  */
2647               if (! single_shifting
2648                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2649                 {
2650                   int i = 1;
2651                   while (src < src_end)
2652                     {
2653                       ONE_MORE_BYTE (c);
2654                       if (c < 0xA0)
2655                         break;
2656                       i++;
2657                     }
2658
2659                   if (i & 1 && src < src_end)
2660                     rejected |= CATEGORY_MASK_ISO_8_2;
2661                   else
2662                     found |= CATEGORY_MASK_ISO_8_2;
2663                 }
2664               break;
2665             }
2666         check_extra_latin:
2667           single_shifting = 0;
2668           if (! VECTORP (Vlatin_extra_code_table)
2669               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2670             {
2671               rejected = CATEGORY_MASK_ISO;
2672               break;
2673             }
2674           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2675               & CODING_ISO_FLAG_LATIN_EXTRA)
2676             found |= CATEGORY_MASK_ISO_8_1;
2677           else
2678             rejected |= CATEGORY_MASK_ISO_8_1;
2679           rejected |= CATEGORY_MASK_ISO_8_2;
2680         }
2681     }
2682   detect_info->rejected |= CATEGORY_MASK_ISO;
2683   return 0;
2684
2685  no_more_source:
2686   detect_info->rejected |= rejected;
2687   detect_info->found |= (found & ~rejected);
2688   return 1;
2689 }
2690
2691
2692 /* Set designation state into CODING.  */
2693 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2694   do {                                                                  \
2695     int id, prev;                                                       \
2696                                                                         \
2697     if (final < '0' || final >= 128                                     \
2698         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2699         || !SAFE_CHARSET_P (coding, id))                                \
2700       {                                                                 \
2701         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2702         goto invalid_code;                                              \
2703       }                                                                 \
2704     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2705     if (id == charset_jisx0201_roman)                                   \
2706       {                                                                 \
2707         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2708           id = charset_ascii;                                           \
2709       }                                                                 \
2710     else if (id == charset_jisx0208_1978)                               \
2711       {                                                                 \
2712         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2713           id = charset_jisx0208;                                        \
2714       }                                                                 \
2715     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2716     /* If there was an invalid designation to REG previously, and this  \
2717        designation is ASCII to REG, we should keep this designation     \
2718        sequence.  */                                                    \
2719     if (prev == -2 && id == charset_ascii)                              \
2720       goto invalid_code;                                                \
2721   } while (0)
2722
2723
2724 #define MAYBE_FINISH_COMPOSITION()                              \
2725   do {                                                          \
2726     int i;                                                      \
2727     if (composition_state == COMPOSING_NO)                      \
2728       break;                                                    \
2729     /* It is assured that we have enough room for producing     \
2730        characters stored in the table `components'.  */         \
2731     if (charbuf + component_idx > charbuf_end)                  \
2732       goto no_more_source;                                      \
2733     composition_state = COMPOSING_NO;                           \
2734     if (method == COMPOSITION_RELATIVE                          \
2735         || method == COMPOSITION_WITH_ALTCHARS)                 \
2736       {                                                         \
2737         for (i = 0; i < component_idx; i++)                     \
2738           *charbuf++ = components[i];                           \
2739         char_offset += component_idx;                           \
2740       }                                                         \
2741     else                                                        \
2742       {                                                         \
2743         for (i = 0; i < component_idx; i += 2)                  \
2744           *charbuf++ = components[i];                           \
2745         char_offset += (component_idx / 2) + 1;                 \
2746       }                                                         \
2747   } while (0)
2748
2749
2750 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2751    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2752    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2753    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2754    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2755   */
2756
2757 #define DECODE_COMPOSITION_START(c1)                                    \
2758   do {                                                                  \
2759     if (c1 == '0'                                                       \
2760         && composition_state == COMPOSING_COMPONENT_RULE)               \
2761       {                                                                 \
2762         component_len = component_idx;                                  \
2763         composition_state = COMPOSING_CHAR;                             \
2764       }                                                                 \
2765     else                                                                \
2766       {                                                                 \
2767         const unsigned char *p;                                         \
2768                                                                         \
2769         MAYBE_FINISH_COMPOSITION ();                                    \
2770         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
2771           goto no_more_source;                                          \
2772         for (p = src; p < src_end - 1; p++)                             \
2773           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
2774             break;                                                      \
2775         if (p == src_end - 1)                                           \
2776           {                                                             \
2777             if (coding->mode & CODING_MODE_LAST_BLOCK)                  \
2778               goto invalid_code;                                        \
2779             goto no_more_source;                                        \
2780           }                                                             \
2781                                                                         \
2782         /* This is surely the start of a composition.  */               \
2783         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
2784                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
2785                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
2786                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
2787         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
2788                              : COMPOSING_COMPONENT_CHAR);               \
2789         component_idx = component_len = 0;                              \
2790       }                                                                 \
2791   } while (0)
2792
2793
2794 /* Handle compositoin end sequence ESC 1.  */
2795
2796 #define DECODE_COMPOSITION_END()                                        \
2797   do {                                                                  \
2798     int nchars = (component_len > 0 ? component_idx - component_len     \
2799                   : method == COMPOSITION_RELATIVE ? component_idx      \
2800                   : (component_idx + 1) / 2);                           \
2801     int i;                                                              \
2802     int *saved_charbuf = charbuf;                                       \
2803                                                                         \
2804     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
2805     if (method != COMPOSITION_RELATIVE)                                 \
2806       {                                                                 \
2807         if (component_len == 0)                                         \
2808           for (i = 0; i < component_idx; i++)                           \
2809             *charbuf++ = components[i];                                 \
2810         else                                                            \
2811           for (i = 0; i < component_len; i++)                           \
2812             *charbuf++ = components[i];                                 \
2813         *saved_charbuf = saved_charbuf - charbuf;                       \
2814       }                                                                 \
2815     if (method == COMPOSITION_WITH_RULE)                                \
2816       for (i = 0; i < component_idx; i += 2, char_offset++)             \
2817         *charbuf++ = components[i];                                     \
2818     else                                                                \
2819       for (i = component_len; i < component_idx; i++, char_offset++)    \
2820         *charbuf++ = components[i];                                     \
2821     coding->annotated = 1;                                              \
2822     composition_state = COMPOSING_NO;                                   \
2823   } while (0)
2824
2825
2826 /* Decode a composition rule from the byte C1 (and maybe one more byte
2827    from SRC) and store one encoded composition rule in
2828    coding->cmp_data.  */
2829
2830 #define DECODE_COMPOSITION_RULE(c1)                                     \
2831   do {                                                                  \
2832     (c1) -= 32;                                                         \
2833     if (c1 < 81)                /* old format (before ver.21) */        \
2834       {                                                                 \
2835         int gref = (c1) / 9;                                            \
2836         int nref = (c1) % 9;                                            \
2837         if (gref == 4) gref = 10;                                       \
2838         if (nref == 4) nref = 10;                                       \
2839         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
2840       }                                                                 \
2841     else if (c1 < 93)           /* new format (after ver.21) */         \
2842       {                                                                 \
2843         ONE_MORE_BYTE (c2);                                             \
2844         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
2845       }                                                                 \
2846     else                                                                \
2847       c1 = 0;                                                           \
2848   } while (0)
2849
2850
2851 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2852
2853 static void
2854 decode_coding_iso_2022 (coding)
2855      struct coding_system *coding;
2856 {
2857   const unsigned char *src = coding->source + coding->consumed;
2858   const unsigned char *src_end = coding->source + coding->src_bytes;
2859   const unsigned char *src_base;
2860   int *charbuf = coding->charbuf + coding->charbuf_used;
2861   int *charbuf_end
2862     = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
2863   int consumed_chars = 0, consumed_chars_base;
2864   int multibytep = coding->src_multibyte;
2865   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
2866   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2867   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2868   struct charset *charset;
2869   int c;
2870   /* For handling composition sequence.  */
2871 #define COMPOSING_NO                    0
2872 #define COMPOSING_CHAR                  1
2873 #define COMPOSING_RULE                  2
2874 #define COMPOSING_COMPONENT_CHAR        3
2875 #define COMPOSING_COMPONENT_RULE        4
2876
2877   int composition_state = COMPOSING_NO;
2878   enum composition_method method;
2879   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2880   int component_idx;
2881   int component_len;
2882   Lisp_Object attrs, charset_list;
2883   int char_offset = coding->produced_char;
2884   int last_offset = char_offset;
2885   int last_id = charset_ascii;
2886
2887   CODING_GET_INFO (coding, attrs, charset_list);
2888   setup_iso_safe_charsets (attrs);
2889
2890   while (1)
2891     {
2892       int c1, c2;
2893
2894       src_base = src;
2895       consumed_chars_base = consumed_chars;
2896
2897       if (charbuf >= charbuf_end)
2898         break;
2899
2900       ONE_MORE_BYTE (c1);
2901       if (c1 < 0)
2902         goto invalid_code;
2903
2904       /* We produce at most one character.  */
2905       switch (iso_code_class [c1])
2906         {
2907         case ISO_0x20_or_0x7F:
2908           if (composition_state != COMPOSING_NO)
2909             {
2910               if (composition_state == COMPOSING_RULE
2911                   || composition_state == COMPOSING_COMPONENT_RULE)
2912                 {
2913                   DECODE_COMPOSITION_RULE (c1);
2914                   components[component_idx++] = c1;
2915                   composition_state--;
2916                   continue;
2917                 }
2918             }
2919           if (charset_id_0 < 0
2920               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2921             /* This is SPACE or DEL.  */
2922             charset = CHARSET_FROM_ID (charset_ascii);
2923           else
2924             charset = CHARSET_FROM_ID (charset_id_0);
2925           break;
2926
2927         case ISO_graphic_plane_0:
2928           if (composition_state != COMPOSING_NO)
2929             {
2930               if (composition_state == COMPOSING_RULE
2931                   || composition_state == COMPOSING_COMPONENT_RULE)
2932                 {
2933                   DECODE_COMPOSITION_RULE (c1);
2934                   components[component_idx++] = c1;
2935                   composition_state--;
2936                   continue;
2937                 }
2938             }
2939           charset = CHARSET_FROM_ID (charset_id_0);
2940           break;
2941
2942         case ISO_0xA0_or_0xFF:
2943           if (charset_id_1 < 0
2944               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2945               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2946             goto invalid_code;
2947           /* This is a graphic character, we fall down ... */
2948
2949         case ISO_graphic_plane_1:
2950           if (charset_id_1 < 0)
2951             goto invalid_code;
2952           charset = CHARSET_FROM_ID (charset_id_1);
2953           break;
2954
2955         case ISO_control_0:
2956           MAYBE_FINISH_COMPOSITION ();
2957           charset = CHARSET_FROM_ID (charset_ascii);
2958           break;
2959
2960         case ISO_control_1:
2961           MAYBE_FINISH_COMPOSITION ();
2962           goto invalid_code;
2963
2964         case ISO_shift_out:
2965           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2966               || CODING_ISO_DESIGNATION (coding, 1) < 0)
2967             goto invalid_code;
2968           CODING_ISO_INVOCATION (coding, 0) = 1;
2969           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2970           continue;
2971
2972         case ISO_shift_in:
2973           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2974             goto invalid_code;
2975           CODING_ISO_INVOCATION (coding, 0) = 0;
2976           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2977           continue;
2978
2979         case ISO_single_shift_2_7:
2980         case ISO_single_shift_2:
2981           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2982             goto invalid_code;
2983           /* SS2 is handled as an escape sequence of ESC 'N' */
2984           c1 = 'N';
2985           goto label_escape_sequence;
2986
2987         case ISO_single_shift_3:
2988           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2989             goto invalid_code;
2990           /* SS2 is handled as an escape sequence of ESC 'O' */
2991           c1 = 'O';
2992           goto label_escape_sequence;
2993
2994         case ISO_control_sequence_introducer:
2995           /* CSI is handled as an escape sequence of ESC '[' ...  */
2996           c1 = '[';
2997           goto label_escape_sequence;
2998
2999         case ISO_escape:
3000           ONE_MORE_BYTE (c1);
3001         label_escape_sequence:
3002           /* Escape sequences handled here are invocation,
3003              designation, direction specification, and character
3004              composition specification.  */
3005           switch (c1)
3006             {
3007             case '&':           /* revision of following character set */
3008               ONE_MORE_BYTE (c1);
3009               if (!(c1 >= '@' && c1 <= '~'))
3010                 goto invalid_code;
3011               ONE_MORE_BYTE (c1);
3012               if (c1 != ISO_CODE_ESC)
3013                 goto invalid_code;
3014               ONE_MORE_BYTE (c1);
3015               goto label_escape_sequence;
3016
3017             case '$':           /* designation of 2-byte character set */
3018               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3019                 goto invalid_code;
3020               ONE_MORE_BYTE (c1);
3021               if (c1 >= '@' && c1 <= 'B')
3022                 {       /* designation of JISX0208.1978, GB2312.1980,
3023                            or JISX0208.1980 */
3024                   DECODE_DESIGNATION (0, 2, 0, c1);
3025                 }
3026               else if (c1 >= 0x28 && c1 <= 0x2B)
3027                 {       /* designation of DIMENSION2_CHARS94 character set */
3028                   ONE_MORE_BYTE (c2);
3029                   DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
3030                 }
3031               else if (c1 >= 0x2C && c1 <= 0x2F)
3032                 {       /* designation of DIMENSION2_CHARS96 character set */
3033                   ONE_MORE_BYTE (c2);
3034                   DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
3035                 }
3036               else
3037                 goto invalid_code;
3038               /* We must update these variables now.  */
3039               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3040               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3041               continue;
3042
3043             case 'n':           /* invocation of locking-shift-2 */
3044               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3045                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3046                 goto invalid_code;
3047               CODING_ISO_INVOCATION (coding, 0) = 2;
3048               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3049               continue;
3050
3051             case 'o':           /* invocation of locking-shift-3 */
3052               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3053                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3054                 goto invalid_code;
3055               CODING_ISO_INVOCATION (coding, 0) = 3;
3056               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3057               continue;
3058
3059             case 'N':           /* invocation of single-shift-2 */
3060               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3061                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3062                 goto invalid_code;
3063               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
3064               ONE_MORE_BYTE (c1);
3065               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3066                 goto invalid_code;
3067               break;
3068
3069             case 'O':           /* invocation of single-shift-3 */
3070               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3071                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3072                 goto invalid_code;
3073               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
3074               ONE_MORE_BYTE (c1);
3075               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3076                 goto invalid_code;
3077               break;
3078
3079             case '0': case '2': case '3': case '4': /* start composition */
3080               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3081                 goto invalid_code;
3082               DECODE_COMPOSITION_START (c1);
3083               continue;
3084
3085             case '1':           /* end composition */
3086               if (composition_state == COMPOSING_NO)
3087                 goto invalid_code;
3088               DECODE_COMPOSITION_END ();
3089               continue;
3090
3091             case '[':           /* specification of direction */
3092               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3093                 goto invalid_code;
3094               /* For the moment, nested direction is not supported.
3095                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3096                  left-to-right, and nozero means right-to-left.  */
3097               ONE_MORE_BYTE (c1);
3098               switch (c1)
3099                 {
3100                 case ']':       /* end of the current direction */
3101                   coding->mode &= ~CODING_MODE_DIRECTION;
3102
3103                 case '0':       /* end of the current direction */
3104                 case '1':       /* start of left-to-right direction */
3105                   ONE_MORE_BYTE (c1);
3106                   if (c1 == ']')
3107                     coding->mode &= ~CODING_MODE_DIRECTION;
3108                   else
3109                     goto invalid_code;
3110                   break;
3111
3112                 case '2':       /* start of right-to-left direction */
3113                   ONE_MORE_BYTE (c1);
3114                   if (c1 == ']')
3115                     coding->mode |= CODING_MODE_DIRECTION;
3116                   else
3117                     goto invalid_code;
3118                   break;
3119
3120                 default:
3121                   goto invalid_code;
3122                 }
3123               continue;
3124
3125             case '%':
3126               ONE_MORE_BYTE (c1);
3127               if (c1 == '/')
3128                 {
3129                   /* CTEXT extended segment:
3130                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3131                      We keep these bytes as is for the moment.
3132                      They may be decoded by post-read-conversion.  */
3133                   int dim, M, L;
3134                   int size;
3135
3136                   ONE_MORE_BYTE (dim);
3137                   ONE_MORE_BYTE (M);
3138                   ONE_MORE_BYTE (L);
3139                   size = ((M - 128) * 128) + (L - 128);
3140                   if (charbuf + 8 + size > charbuf_end)
3141                     goto break_loop;
3142                   *charbuf++ = ISO_CODE_ESC;
3143                   *charbuf++ = '%';
3144                   *charbuf++ = '/';
3145                   *charbuf++ = dim;
3146                   *charbuf++ = BYTE8_TO_CHAR (M);
3147                   *charbuf++ = BYTE8_TO_CHAR (L);
3148                   while (size-- > 0)
3149                     {
3150                       ONE_MORE_BYTE (c1);
3151                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3152                     }
3153                 }
3154               else if (c1 == 'G')
3155                 {
3156                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3157                      ESC % G --UTF-8-BYTES-- ESC % @
3158                      We keep these bytes as is for the moment.
3159                      They may be decoded by post-read-conversion.  */
3160                   int *p = charbuf;
3161
3162                   if (p + 6 > charbuf_end)
3163                     goto break_loop;
3164                   *p++ = ISO_CODE_ESC;
3165                   *p++ = '%';
3166                   *p++ = 'G';
3167                   while (p < charbuf_end)
3168                     {
3169                       ONE_MORE_BYTE (c1);
3170                       if (c1 == ISO_CODE_ESC
3171                           && src + 1 < src_end
3172                           && src[0] == '%'
3173                           && src[1] == '@')
3174                         break;
3175                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3176                     }
3177                   if (p + 3 > charbuf_end)
3178                     goto break_loop;
3179                   *p++ = ISO_CODE_ESC;
3180                   *p++ = '%';
3181                   *p++ = '@';
3182                   charbuf = p;
3183                 }
3184               else
3185                 goto invalid_code;
3186               continue;
3187               break;
3188
3189             default:
3190               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3191                 goto invalid_code;
3192               if (c1 >= 0x28 && c1 <= 0x2B)
3193                 {       /* designation of DIMENSION1_CHARS94 character set */
3194                   ONE_MORE_BYTE (c2);
3195                   DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
3196                 }
3197               else if (c1 >= 0x2C && c1 <= 0x2F)
3198                 {       /* designation of DIMENSION1_CHARS96 character set */
3199                   ONE_MORE_BYTE (c2);
3200                   DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
3201                 }
3202               else
3203                 goto invalid_code;
3204               /* We must update these variables now.  */
3205               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3206               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3207               continue;
3208             }
3209         }
3210
3211       if (charset->id != charset_ascii
3212           && last_id != charset->id)
3213         {
3214           if (last_id != charset_ascii)
3215             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3216           last_id = charset->id;
3217           last_offset = char_offset;
3218         }
3219
3220       /* Now we know CHARSET and 1st position code C1 of a character.
3221          Produce a decoded character while getting 2nd position code
3222          C2 if necessary.  */
3223       c1 &= 0x7F;
3224       if (CHARSET_DIMENSION (charset) > 1)
3225         {
3226           ONE_MORE_BYTE (c2);
3227           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3228             /* C2 is not in a valid range.  */
3229             goto invalid_code;
3230           c1 = (c1 << 8) | (c2 & 0x7F);
3231           if (CHARSET_DIMENSION (charset) > 2)
3232             {
3233               ONE_MORE_BYTE (c2);
3234               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3235                 /* C2 is not in a valid range.  */
3236                 goto invalid_code;
3237               c1 = (c1 << 8) | (c2 & 0x7F);
3238             }
3239         }
3240
3241       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3242       if (c < 0)
3243         {
3244           MAYBE_FINISH_COMPOSITION ();
3245           for (; src_base < src; src_base++, char_offset++)
3246             {
3247               if (ASCII_BYTE_P (*src_base))
3248                 *charbuf++ = *src_base;
3249               else
3250                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3251             }
3252         }
3253       else if (composition_state == COMPOSING_NO)
3254         {
3255           *charbuf++ = c;
3256           char_offset++;
3257         }
3258       else
3259         {
3260           components[component_idx++] = c;
3261           if (method == COMPOSITION_WITH_RULE
3262               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3263                   && composition_state == COMPOSING_COMPONENT_CHAR))
3264             composition_state++;
3265         }
3266       continue;
3267
3268     invalid_code:
3269       MAYBE_FINISH_COMPOSITION ();
3270       src = src_base;
3271       consumed_chars = consumed_chars_base;
3272       ONE_MORE_BYTE (c);
3273       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3274       char_offset++;
3275       coding->errors++;
3276       continue;
3277
3278     break_loop:
3279       break;
3280     }
3281
3282  no_more_source:
3283   if (last_id != charset_ascii)
3284     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3285   coding->consumed_char += consumed_chars_base;
3286   coding->consumed = src_base - coding->source;
3287   coding->charbuf_used = charbuf - coding->charbuf;
3288 }
3289
3290
3291 /* ISO2022 encoding stuff.  */
3292
3293 /*
3294    It is not enough to say just "ISO2022" on encoding, we have to
3295    specify more details.  In Emacs, each coding system of ISO2022
3296    variant has the following specifications:
3297         1. Initial designation to G0 thru G3.
3298         2. Allows short-form designation?
3299         3. ASCII should be designated to G0 before control characters?
3300         4. ASCII should be designated to G0 at end of line?
3301         5. 7-bit environment or 8-bit environment?
3302         6. Use locking-shift?
3303         7. Use Single-shift?
3304    And the following two are only for Japanese:
3305         8. Use ASCII in place of JIS0201-1976-Roman?
3306         9. Use JISX0208-1983 in place of JISX0208-1978?
3307    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3308    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3309    details.
3310 */
3311
3312 /* Produce codes (escape sequence) for designating CHARSET to graphic
3313    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3314    '@', 'A', or 'B' and the coding system CODING allows, produce
3315    designation sequence of short-form.  */
3316
3317 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3318   do {                                                                  \
3319     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3320     char *intermediate_char_94 = "()*+";                                \
3321     char *intermediate_char_96 = ",-./";                                \
3322     int revision = -1;                                                  \
3323     int c;                                                              \
3324                                                                         \
3325     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3326       revision = CHARSET_ISO_REVISION (charset);                        \
3327                                                                         \
3328     if (revision >= 0)                                                  \
3329       {                                                                 \
3330         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3331         EMIT_ONE_BYTE ('@' + revision);                                 \
3332       }                                                                 \
3333     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3334     if (CHARSET_DIMENSION (charset) == 1)                               \
3335       {                                                                 \
3336         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3337           c = intermediate_char_94[reg];                                \
3338         else                                                            \
3339           c = intermediate_char_96[reg];                                \
3340         EMIT_ONE_ASCII_BYTE (c);                                        \
3341       }                                                                 \
3342     else                                                                \
3343       {                                                                 \
3344         EMIT_ONE_ASCII_BYTE ('$');                                      \
3345         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3346           {                                                             \
3347             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3348                 || reg != 0                                             \
3349                 || final_char < '@' || final_char > 'B')                \
3350               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3351           }                                                             \
3352         else                                                            \
3353           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3354       }                                                                 \
3355     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3356                                                                         \
3357     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3358   } while (0)
3359
3360
3361 /* The following two macros produce codes (control character or escape
3362    sequence) for ISO2022 single-shift functions (single-shift-2 and
3363    single-shift-3).  */
3364
3365 #define ENCODE_SINGLE_SHIFT_2                                           \
3366   do {                                                                  \
3367     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3368       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3369     else                                                                \
3370       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3371     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3372   } while (0)
3373
3374
3375 #define ENCODE_SINGLE_SHIFT_3                                           \
3376   do {                                                                  \
3377     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3378       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3379     else                                                                \
3380       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3381     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3382   } while (0)
3383
3384
3385 /* The following four macros produce codes (control character or
3386    escape sequence) for ISO2022 locking-shift functions (shift-in,
3387    shift-out, locking-shift-2, and locking-shift-3).  */
3388
3389 #define ENCODE_SHIFT_IN                                 \
3390   do {                                                  \
3391     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3392     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3393   } while (0)
3394
3395
3396 #define ENCODE_SHIFT_OUT                                \
3397   do {                                                  \
3398     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3399     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3400   } while (0)
3401
3402
3403 #define ENCODE_LOCKING_SHIFT_2                          \
3404   do {                                                  \
3405     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3406     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3407   } while (0)
3408
3409
3410 #define ENCODE_LOCKING_SHIFT_3                          \
3411   do {                                                  \
3412     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3413     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3414   } while (0)
3415
3416
3417 /* Produce codes for a DIMENSION1 character whose character set is
3418    CHARSET and whose position-code is C1.  Designation and invocation
3419    sequences are also produced in advance if necessary.  */
3420
3421 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3422   do {                                                                  \
3423     int id = CHARSET_ID (charset);                                      \
3424                                                                         \
3425     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3426         && id == charset_ascii)                                         \
3427       {                                                                 \
3428         id = charset_jisx0201_roman;                                    \
3429         charset = CHARSET_FROM_ID (id);                                 \
3430       }                                                                 \
3431                                                                         \
3432     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3433       {                                                                 \
3434         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3435           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3436         else                                                            \
3437           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3438         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3439         break;                                                          \
3440       }                                                                 \
3441     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3442       {                                                                 \
3443         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3444         break;                                                          \
3445       }                                                                 \
3446     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3447       {                                                                 \
3448         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3449         break;                                                          \
3450       }                                                                 \
3451     else                                                                \
3452       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3453          must invoke it, or, at first, designate it to some graphic     \
3454          register.  Then repeat the loop to actually produce the        \
3455          character.  */                                                 \
3456       dst = encode_invocation_designation (charset, coding, dst,        \
3457                                            &produced_chars);            \
3458   } while (1)
3459
3460
3461 /* Produce codes for a DIMENSION2 character whose character set is
3462    CHARSET and whose position-codes are C1 and C2.  Designation and
3463    invocation codes are also produced in advance if necessary.  */
3464
3465 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3466   do {                                                                  \
3467     int id = CHARSET_ID (charset);                                      \
3468                                                                         \
3469     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3470         && id == charset_jisx0208)                                      \
3471       {                                                                 \
3472         id = charset_jisx0208_1978;                                     \
3473         charset = CHARSET_FROM_ID (id);                                 \
3474       }                                                                 \
3475                                                                         \
3476     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3477       {                                                                 \
3478         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3479           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3480         else                                                            \
3481           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3482         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3483         break;                                                          \
3484       }                                                                 \
3485     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3486       {                                                                 \
3487         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3488         break;                                                          \
3489       }                                                                 \
3490     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3491       {                                                                 \
3492         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3493         break;                                                          \
3494       }                                                                 \
3495     else                                                                \
3496       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3497          must invoke it, or, at first, designate it to some graphic     \
3498          register.  Then repeat the loop to actually produce the        \
3499          character.  */                                                 \
3500       dst = encode_invocation_designation (charset, coding, dst,        \
3501                                            &produced_chars);            \
3502   } while (1)
3503
3504
3505 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3506   do {                                                                     \
3507     int code = ENCODE_CHAR ((charset),(c));                                \
3508                                                                            \
3509     if (CHARSET_DIMENSION (charset) == 1)                                  \
3510       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3511     else                                                                   \
3512       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3513   } while (0)
3514
3515
3516 /* Produce designation and invocation codes at a place pointed by DST
3517    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3518    Return new DST.  */
3519
3520 unsigned char *
3521 encode_invocation_designation (charset, coding, dst, p_nchars)
3522      struct charset *charset;
3523      struct coding_system *coding;
3524      unsigned char *dst;
3525      int *p_nchars;
3526 {
3527   int multibytep = coding->dst_multibyte;
3528   int produced_chars = *p_nchars;
3529   int reg;                      /* graphic register number */
3530   int id = CHARSET_ID (charset);
3531
3532   /* At first, check designations.  */
3533   for (reg = 0; reg < 4; reg++)
3534     if (id == CODING_ISO_DESIGNATION (coding, reg))
3535       break;
3536
3537   if (reg >= 4)
3538     {
3539       /* CHARSET is not yet designated to any graphic registers.  */
3540       /* At first check the requested designation.  */
3541       reg = CODING_ISO_REQUEST (coding, id);
3542       if (reg < 0)
3543         /* Since CHARSET requests no special designation, designate it
3544            to graphic register 0.  */
3545         reg = 0;
3546
3547       ENCODE_DESIGNATION (charset, reg, coding);
3548     }
3549
3550   if (CODING_ISO_INVOCATION (coding, 0) != reg
3551       && CODING_ISO_INVOCATION (coding, 1) != reg)
3552     {
3553       /* Since the graphic register REG is not invoked to any graphic
3554          planes, invoke it to graphic plane 0.  */
3555       switch (reg)
3556         {
3557         case 0:                 /* graphic register 0 */
3558           ENCODE_SHIFT_IN;
3559           break;
3560
3561         case 1:                 /* graphic register 1 */
3562           ENCODE_SHIFT_OUT;
3563           break;
3564
3565         case 2:                 /* graphic register 2 */
3566           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3567             ENCODE_SINGLE_SHIFT_2;
3568           else
3569             ENCODE_LOCKING_SHIFT_2;
3570           break;
3571
3572         case 3:                 /* graphic register 3 */
3573           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3574             ENCODE_SINGLE_SHIFT_3;
3575           else
3576             ENCODE_LOCKING_SHIFT_3;
3577           break;
3578         }
3579     }
3580
3581   *p_nchars = produced_chars;
3582   return dst;
3583 }
3584
3585 /* The following three macros produce codes for indicating direction
3586    of text.  */
3587 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3588   do {                                                                  \
3589     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3590       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3591     else                                                                \
3592       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3593   } while (0)
3594
3595
3596 #define ENCODE_DIRECTION_R2L()                  \
3597   do {                                          \
3598     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3599     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3600   } while (0)
3601
3602
3603 #define ENCODE_DIRECTION_L2R()                  \
3604   do {                                          \
3605     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3606     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3607   } while (0)
3608
3609
3610 /* Produce codes for designation and invocation to reset the graphic
3611    planes and registers to initial state.  */
3612 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3613   do {                                                                  \
3614     int reg;                                                            \
3615     struct charset *charset;                                            \
3616                                                                         \
3617     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3618       ENCODE_SHIFT_IN;                                                  \
3619     for (reg = 0; reg < 4; reg++)                                       \
3620       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3621           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3622               != CODING_ISO_INITIAL (coding, reg)))                     \
3623         {                                                               \
3624           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3625           ENCODE_DESIGNATION (charset, reg, coding);                    \
3626         }                                                               \
3627   } while (0)
3628
3629
3630 /* Produce designation sequences of charsets in the line started from
3631    SRC to a place pointed by DST, and return updated DST.
3632
3633    If the current block ends before any end-of-line, we may fail to
3634    find all the necessary designations.  */
3635
3636 static unsigned char *
3637 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3638      struct coding_system *coding;
3639      int *charbuf, *charbuf_end;
3640      unsigned char *dst;
3641 {
3642   struct charset *charset;
3643   /* Table of charsets to be designated to each graphic register.  */
3644   int r[4];
3645   int c, found = 0, reg;
3646   int produced_chars = 0;
3647   int multibytep = coding->dst_multibyte;
3648   Lisp_Object attrs;
3649   Lisp_Object charset_list;
3650
3651   attrs = CODING_ID_ATTRS (coding->id);
3652   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3653   if (EQ (charset_list, Qiso_2022))
3654     charset_list = Viso_2022_charset_list;
3655
3656   for (reg = 0; reg < 4; reg++)
3657     r[reg] = -1;
3658
3659   while (found < 4)
3660     {
3661       int id;
3662
3663       c = *charbuf++;
3664       if (c == '\n')
3665         break;
3666       charset = char_charset (c, charset_list, NULL);
3667       id = CHARSET_ID (charset);
3668       reg = CODING_ISO_REQUEST (coding, id);
3669       if (reg >= 0 && r[reg] < 0)
3670         {
3671           found++;
3672           r[reg] = id;
3673         }
3674     }
3675
3676   if (found)
3677     {
3678       for (reg = 0; reg < 4; reg++)
3679         if (r[reg] >= 0
3680             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3681           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3682     }
3683
3684   return dst;
3685 }
3686
3687 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
3688
3689 static int
3690 encode_coding_iso_2022 (coding)
3691      struct coding_system *coding;
3692 {
3693   int multibytep = coding->dst_multibyte;
3694   int *charbuf = coding->charbuf;
3695   int *charbuf_end = charbuf + coding->charbuf_used;
3696   unsigned char *dst = coding->destination + coding->produced;
3697   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3698   int safe_room = 16;
3699   int bol_designation
3700     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3701        && CODING_ISO_BOL (coding));
3702   int produced_chars = 0;
3703   Lisp_Object attrs, eol_type, charset_list;
3704   int ascii_compatible;
3705   int c;
3706   int preferred_charset_id = -1;
3707
3708   CODING_GET_INFO (coding, attrs, charset_list);
3709   eol_type = CODING_ID_EOL_TYPE (coding->id);
3710   if (VECTORP (eol_type))
3711     eol_type = Qunix;
3712
3713   setup_iso_safe_charsets (attrs);
3714   /* Charset list may have been changed.  */
3715   charset_list = CODING_ATTR_CHARSET_LIST (attrs);              \
3716   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3717
3718   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3719
3720   while (charbuf < charbuf_end)
3721     {
3722       ASSURE_DESTINATION (safe_room);
3723
3724       if (bol_designation)
3725         {
3726           unsigned char *dst_prev = dst;
3727
3728           /* We have to produce designation sequences if any now.  */
3729           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3730           bol_designation = 0;
3731           /* We are sure that designation sequences are all ASCII bytes.  */
3732           produced_chars += dst - dst_prev;
3733         }
3734
3735       c = *charbuf++;
3736
3737       if (c < 0)
3738         {
3739           /* Handle an annotation.  */
3740           switch (*charbuf)
3741             {
3742             case CODING_ANNOTATE_COMPOSITION_MASK:
3743               /* Not yet implemented.  */
3744               break;
3745             case CODING_ANNOTATE_CHARSET_MASK:
3746               preferred_charset_id = charbuf[3];
3747               if (preferred_charset_id >= 0
3748                   && NILP (Fmemq (make_number (preferred_charset_id),
3749                                   charset_list)))
3750                 preferred_charset_id = -1;
3751               break;
3752             default:
3753               abort ();
3754             }
3755           charbuf += -c - 1;
3756           continue;
3757         }
3758
3759       /* Now encode the character C.  */
3760       if (c < 0x20 || c == 0x7F)
3761         {
3762           if (c == '\n'
3763               || (c == '\r' && EQ (eol_type, Qmac)))
3764             {
3765               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3766                 ENCODE_RESET_PLANE_AND_REGISTER ();
3767               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3768                 {
3769                   int i;
3770
3771                   for (i = 0; i < 4; i++)
3772                     CODING_ISO_DESIGNATION (coding, i)
3773                       = CODING_ISO_INITIAL (coding, i);
3774                 }
3775               bol_designation
3776                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3777             }
3778           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3779             ENCODE_RESET_PLANE_AND_REGISTER ();
3780           EMIT_ONE_ASCII_BYTE (c);
3781         }
3782       else if (ASCII_CHAR_P (c))
3783         {
3784           if (ascii_compatible)
3785             EMIT_ONE_ASCII_BYTE (c);
3786           else
3787             {
3788               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3789               ENCODE_ISO_CHARACTER (charset, c);
3790             }
3791         }
3792       else if (CHAR_BYTE8_P (c))
3793         {
3794           c = CHAR_TO_BYTE8 (c);
3795           EMIT_ONE_BYTE (c);
3796         }
3797       else
3798         {
3799           struct charset *charset;
3800
3801           if (preferred_charset_id >= 0)
3802             {
3803               charset = CHARSET_FROM_ID (preferred_charset_id);
3804               if (! CHAR_CHARSET_P (c, charset))
3805                 charset = char_charset (c, charset_list, NULL);
3806             }
3807           else
3808             charset = char_charset (c, charset_list, NULL);
3809           if (!charset)
3810             {
3811               if (coding->mode & CODING_MODE_SAFE_ENCODING)
3812                 {
3813                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3814                   charset = CHARSET_FROM_ID (charset_ascii);
3815                 }
3816               else
3817                 {
3818                   c = coding->default_char;
3819                   charset = char_charset (c, charset_list, NULL);
3820                 }
3821             }
3822           ENCODE_ISO_CHARACTER (charset, c);
3823         }
3824     }
3825
3826   if (coding->mode & CODING_MODE_LAST_BLOCK
3827       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3828     {
3829       ASSURE_DESTINATION (safe_room);
3830       ENCODE_RESET_PLANE_AND_REGISTER ();
3831     }
3832   record_conversion_result (coding, CODING_RESULT_SUCCESS);
3833   CODING_ISO_BOL (coding) = bol_designation;
3834   coding->produced_char += produced_chars;
3835   coding->produced = dst - coding->destination;
3836   return 0;
3837 }
3838
3839 \f
3840 /*** 8,9. SJIS and BIG5 handlers ***/
3841
3842 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3843    quite widely.  So, for the moment, Emacs supports them in the bare
3844    C code.  But, in the future, they may be supported only by CCL.  */
3845
3846 /* SJIS is a coding system encoding three character sets: ASCII, right
3847    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
3848    as is.  A character of charset katakana-jisx0201 is encoded by
3849    "position-code + 0x80".  A character of charset japanese-jisx0208
3850    is encoded in 2-byte but two position-codes are divided and shifted
3851    so that it fit in the range below.
3852
3853    --- CODE RANGE of SJIS ---
3854    (character set)      (range)
3855    ASCII                0x00 .. 0x7F
3856    KATAKANA-JISX0201    0xA0 .. 0xDF
3857    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
3858             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
3859    -------------------------------
3860
3861 */
3862
3863 /* BIG5 is a coding system encoding two character sets: ASCII and
3864    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
3865    character set and is encoded in two-byte.
3866
3867    --- CODE RANGE of BIG5 ---
3868    (character set)      (range)
3869    ASCII                0x00 .. 0x7F
3870    Big5 (1st byte)      0xA1 .. 0xFE
3871         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
3872    --------------------------
3873
3874   */
3875
3876 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3877    Check if a text is encoded in SJIS.  If it is, return
3878    CATEGORY_MASK_SJIS, else return 0.  */
3879
3880 static int
3881 detect_coding_sjis (coding, detect_info)
3882      struct coding_system *coding;
3883      struct coding_detection_info *detect_info;
3884 {
3885   const unsigned char *src = coding->source, *src_base;
3886   const unsigned char *src_end = coding->source + coding->src_bytes;
3887   int multibytep = coding->src_multibyte;
3888   int consumed_chars = 0;
3889   int found = 0;
3890   int c;
3891
3892   detect_info->checked |= CATEGORY_MASK_SJIS;
3893   /* A coding system of this category is always ASCII compatible.  */
3894   src += coding->head_ascii;
3895
3896   while (1)
3897     {
3898       src_base = src;
3899       ONE_MORE_BYTE (c);
3900       if (c < 0x80)
3901         continue;
3902       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3903         {
3904           ONE_MORE_BYTE (c);
3905           if (c < 0x40 || c == 0x7F || c > 0xFC)
3906             break;
3907           found = CATEGORY_MASK_SJIS;
3908         }
3909       else if (c >= 0xA0 && c < 0xE0)
3910         found = CATEGORY_MASK_SJIS;
3911       else
3912         break;
3913     }
3914   detect_info->rejected |= CATEGORY_MASK_SJIS;
3915   return 0;
3916
3917  no_more_source:
3918   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
3919     {
3920       detect_info->rejected |= CATEGORY_MASK_SJIS;
3921       return 0;
3922     }
3923   detect_info->found |= found;
3924   return 1;
3925 }
3926
3927 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3928    Check if a text is encoded in BIG5.  If it is, return
3929    CATEGORY_MASK_BIG5, else return 0.  */
3930
3931 static int
3932 detect_coding_big5 (coding, detect_info)
3933      struct coding_system *coding;
3934      struct coding_detection_info *detect_info;
3935 {
3936   const unsigned char *src = coding->source, *src_base;
3937   const unsigned char *src_end = coding->source + coding->src_bytes;
3938   int multibytep = coding->src_multibyte;
3939   int consumed_chars = 0;
3940   int found = 0;
3941   int c;
3942
3943   detect_info->checked |= CATEGORY_MASK_BIG5;
3944   /* A coding system of this category is always ASCII compatible.  */
3945   src += coding->head_ascii;
3946
3947   while (1)
3948     {
3949       src_base = src;
3950       ONE_MORE_BYTE (c);
3951       if (c < 0x80)
3952         continue;
3953       if (c >= 0xA1)
3954         {
3955           ONE_MORE_BYTE (c);
3956           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3957             return 0;
3958           found = CATEGORY_MASK_BIG5;
3959         }
3960       else
3961         break;
3962     }
3963   detect_info->rejected |= CATEGORY_MASK_BIG5;
3964   return 0;
3965
3966  no_more_source:
3967   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
3968     {
3969       detect_info->rejected |= CATEGORY_MASK_BIG5;
3970       return 0;
3971     }
3972   detect_info->found |= found;
3973   return 1;
3974 }
3975
3976 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3977    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3978
3979 static void
3980 decode_coding_sjis (coding)
3981      struct coding_system *coding;
3982 {
3983   const unsigned char *src = coding->source + coding->consumed;
3984   const unsigned char *src_end = coding->source + coding->src_bytes;
3985   const unsigned char *src_base;
3986   int *charbuf = coding->charbuf + coding->charbuf_used;
3987   int *charbuf_end
3988     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
3989   int consumed_chars = 0, consumed_chars_base;
3990   int multibytep = coding->src_multibyte;
3991   struct charset *charset_roman, *charset_kanji, *charset_kana;
3992   struct charset *charset_kanji2;
3993   Lisp_Object attrs, charset_list, val;
3994   int char_offset = coding->produced_char;
3995   int last_offset = char_offset;
3996   int last_id = charset_ascii;
3997
3998   CODING_GET_INFO (coding, attrs, charset_list);
3999
4000   val = charset_list;
4001   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4002   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4003   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4004   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4005
4006   while (1)
4007     {
4008       int c, c1;
4009       struct charset *charset;
4010
4011       src_base = src;
4012       consumed_chars_base = consumed_chars;
4013
4014       if (charbuf >= charbuf_end)
4015         break;
4016
4017       ONE_MORE_BYTE (c);
4018       if (c < 0)
4019         goto invalid_code;
4020       if (c < 0x80)
4021         charset = charset_roman;
4022       else if (c == 0x80 || c == 0xA0)
4023         goto invalid_code;
4024       else if (c >= 0xA1 && c <= 0xDF)
4025         {
4026           /* SJIS -> JISX0201-Kana */
4027           c &= 0x7F;
4028           charset = charset_kana;
4029         }
4030       else if (c <= 0xEF)
4031         {
4032           /* SJIS -> JISX0208 */
4033           ONE_MORE_BYTE (c1);
4034           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4035             goto invalid_code;
4036           c = (c << 8) | c1;
4037           SJIS_TO_JIS (c);
4038           charset = charset_kanji;
4039         }
4040       else if (c <= 0xFC && charset_kanji2)
4041         {
4042           /* SJIS -> JISX0213-2 */
4043           ONE_MORE_BYTE (c1);
4044           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4045             goto invalid_code;
4046           c = (c << 8) | c1;
4047           SJIS_TO_JIS2 (c);
4048           charset = charset_kanji2;
4049         }
4050       else
4051         goto invalid_code;
4052       if (charset->id != charset_ascii
4053           && last_id != charset->id)
4054         {
4055           if (last_id != charset_ascii)
4056             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4057           last_id = charset->id;
4058           last_offset = char_offset;
4059         }
4060       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4061       *charbuf++ = c;
4062       char_offset++;
4063       continue;
4064
4065     invalid_code:
4066       src = src_base;
4067       consumed_chars = consumed_chars_base;
4068       ONE_MORE_BYTE (c);
4069       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4070       char_offset++;
4071       coding->errors++;
4072     }
4073
4074  no_more_source:
4075   if (last_id != charset_ascii)
4076     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4077   coding->consumed_char += consumed_chars_base;
4078   coding->consumed = src_base - coding->source;
4079   coding->charbuf_used = charbuf - coding->charbuf;
4080 }
4081
4082 static void
4083 decode_coding_big5 (coding)
4084      struct coding_system *coding;
4085 {
4086   const unsigned char *src = coding->source + coding->consumed;
4087   const unsigned char *src_end = coding->source + coding->src_bytes;
4088   const unsigned char *src_base;
4089   int *charbuf = coding->charbuf + coding->charbuf_used;
4090   int *charbuf_end
4091     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4092   int consumed_chars = 0, consumed_chars_base;
4093   int multibytep = coding->src_multibyte;
4094   struct charset *charset_roman, *charset_big5;
4095   Lisp_Object attrs, charset_list, val;
4096   int char_offset = coding->produced_char;
4097   int last_offset = char_offset;
4098   int last_id = charset_ascii;
4099
4100   CODING_GET_INFO (coding, attrs, charset_list);
4101   val = charset_list;
4102   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4103   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4104
4105   while (1)
4106     {
4107       int c, c1;
4108       struct charset *charset;
4109
4110       src_base = src;
4111       consumed_chars_base = consumed_chars;
4112
4113       if (charbuf >= charbuf_end)
4114         break;
4115
4116       ONE_MORE_BYTE (c);
4117
4118       if (c < 0)
4119         goto invalid_code;
4120       if (c < 0x80)
4121         charset = charset_roman;
4122       else
4123         {
4124           /* BIG5 -> Big5 */
4125           if (c < 0xA1 || c > 0xFE)
4126             goto invalid_code;
4127           ONE_MORE_BYTE (c1);
4128           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4129             goto invalid_code;
4130           c = c << 8 | c1;
4131           charset = charset_big5;
4132         }
4133       if (charset->id != charset_ascii
4134           && last_id != charset->id)
4135         {
4136           if (last_id != charset_ascii)
4137             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4138           last_id = charset->id;
4139           last_offset = char_offset;
4140         }
4141       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4142       *charbuf++ = c;
4143       char_offset++;
4144       continue;
4145
4146     invalid_code:
4147       src = src_base;
4148       consumed_chars = consumed_chars_base;
4149       ONE_MORE_BYTE (c);
4150       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4151       char_offset++;
4152       coding->errors++;
4153     }
4154
4155  no_more_source:
4156   if (last_id != charset_ascii)
4157     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4158   coding->consumed_char += consumed_chars_base;
4159   coding->consumed = src_base - coding->source;
4160   coding->charbuf_used = charbuf - coding->charbuf;
4161 }
4162
4163 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4164    This function can encode charsets `ascii', `katakana-jisx0201',
4165    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4166    are sure that all these charsets are registered as official charset
4167    (i.e. do not have extended leading-codes).  Characters of other
4168    charsets are produced without any encoding.  If SJIS_P is 1, encode
4169    SJIS text, else encode BIG5 text.  */
4170
4171 static int
4172 encode_coding_sjis (coding)
4173      struct coding_system *coding;
4174 {
4175   int multibytep = coding->dst_multibyte;
4176   int *charbuf = coding->charbuf;
4177   int *charbuf_end = charbuf + coding->charbuf_used;
4178   unsigned char *dst = coding->destination + coding->produced;
4179   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4180   int safe_room = 4;
4181   int produced_chars = 0;
4182   Lisp_Object attrs, charset_list, val;
4183   int ascii_compatible;
4184   struct charset *charset_roman, *charset_kanji, *charset_kana;
4185   struct charset *charset_kanji2;
4186   int c;
4187
4188   CODING_GET_INFO (coding, attrs, charset_list);
4189   val = charset_list;
4190   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4191   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4192   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4193   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4194
4195   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4196
4197   while (charbuf < charbuf_end)
4198     {
4199       ASSURE_DESTINATION (safe_room);
4200       c = *charbuf++;
4201       /* Now encode the character C.  */
4202       if (ASCII_CHAR_P (c) && ascii_compatible)
4203         EMIT_ONE_ASCII_BYTE (c);
4204       else if (CHAR_BYTE8_P (c))
4205         {
4206           c = CHAR_TO_BYTE8 (c);
4207           EMIT_ONE_BYTE (c);
4208         }
4209       else
4210         {
4211           unsigned code;
4212           struct charset *charset = char_charset (c, charset_list, &code);
4213
4214           if (!charset)
4215             {
4216               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4217                 {
4218                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4219                   charset = CHARSET_FROM_ID (charset_ascii);
4220                 }
4221               else
4222                 {
4223                   c = coding->default_char;
4224                   charset = char_charset (c, charset_list, &code);
4225                 }
4226             }
4227           if (code == CHARSET_INVALID_CODE (charset))
4228             abort ();
4229           if (charset == charset_kanji)
4230             {
4231               int c1, c2;
4232               JIS_TO_SJIS (code);
4233               c1 = code >> 8, c2 = code & 0xFF;
4234               EMIT_TWO_BYTES (c1, c2);
4235             }
4236           else if (charset == charset_kana)
4237             EMIT_ONE_BYTE (code | 0x80);
4238           else if (charset_kanji2 && charset == charset_kanji2)
4239             {
4240               int c1, c2;
4241
4242               c1 = code >> 8;
4243               if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4244                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4245                 {
4246                   JIS_TO_SJIS2 (code);
4247                   c1 = code >> 8, c2 = code & 0xFF;
4248                   EMIT_TWO_BYTES (c1, c2);
4249                 }
4250               else
4251                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4252             }
4253           else
4254             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4255         }
4256     }
4257   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4258   coding->produced_char += produced_chars;
4259   coding->produced = dst - coding->destination;
4260   return 0;
4261 }
4262
4263 static int
4264 encode_coding_big5 (coding)
4265      struct coding_system *coding;
4266 {
4267   int multibytep = coding->dst_multibyte;
4268   int *charbuf = coding->charbuf;
4269   int *charbuf_end = charbuf + coding->charbuf_used;
4270   unsigned char *dst = coding->destination + coding->produced;
4271   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4272   int safe_room = 4;
4273   int produced_chars = 0;
4274   Lisp_Object attrs, charset_list, val;
4275   int ascii_compatible;
4276   struct charset *charset_roman, *charset_big5;
4277   int c;
4278
4279   CODING_GET_INFO (coding, attrs, charset_list);
4280   val = charset_list;
4281   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4282   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4283   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4284
4285   while (charbuf < charbuf_end)
4286     {
4287       ASSURE_DESTINATION (safe_room);
4288       c = *charbuf++;
4289       /* Now encode the character C.  */
4290       if (ASCII_CHAR_P (c) && ascii_compatible)
4291         EMIT_ONE_ASCII_BYTE (c);
4292       else if (CHAR_BYTE8_P (c))
4293         {
4294           c = CHAR_TO_BYTE8 (c);
4295           EMIT_ONE_BYTE (c);
4296         }
4297       else
4298         {
4299           unsigned code;
4300           struct charset *charset = char_charset (c, charset_list, &code);
4301
4302           if (! charset)
4303             {
4304               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4305                 {
4306                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4307                   charset = CHARSET_FROM_ID (charset_ascii);
4308                 }
4309               else
4310                 {
4311                   c = coding->default_char;
4312                   charset = char_charset (c, charset_list, &code);
4313                 }
4314             }
4315           if (code == CHARSET_INVALID_CODE (charset))
4316             abort ();
4317           if (charset == charset_big5)
4318             {
4319               int c1, c2;
4320
4321               c1 = code >> 8, c2 = code & 0xFF;
4322               EMIT_TWO_BYTES (c1, c2);
4323             }
4324           else
4325             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4326         }
4327     }
4328   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4329   coding->produced_char += produced_chars;
4330   coding->produced = dst - coding->destination;
4331   return 0;
4332 }
4333
4334 \f
4335 /*** 10. CCL handlers ***/
4336
4337 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4338    Check if a text is encoded in a coding system of which
4339    encoder/decoder are written in CCL program.  If it is, return
4340    CATEGORY_MASK_CCL, else return 0.  */
4341
4342 static int
4343 detect_coding_ccl (coding, detect_info)
4344      struct coding_system *coding;
4345      struct coding_detection_info *detect_info;
4346 {
4347   const unsigned char *src = coding->source, *src_base;
4348   const unsigned char *src_end = coding->source + coding->src_bytes;
4349   int multibytep = coding->src_multibyte;
4350   int consumed_chars = 0;
4351   int found = 0;
4352   unsigned char *valids;
4353   int head_ascii = coding->head_ascii;
4354   Lisp_Object attrs;
4355
4356   detect_info->checked |= CATEGORY_MASK_CCL;
4357
4358   coding = &coding_categories[coding_category_ccl];
4359   valids = CODING_CCL_VALIDS (coding);
4360   attrs = CODING_ID_ATTRS (coding->id);
4361   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4362     src += head_ascii;
4363
4364   while (1)
4365     {
4366       int c;
4367
4368       src_base = src;
4369       ONE_MORE_BYTE (c);
4370       if (c < 0 || ! valids[c])
4371         break;
4372       if ((valids[c] > 1))
4373         found = CATEGORY_MASK_CCL;
4374     }
4375   detect_info->rejected |= CATEGORY_MASK_CCL;
4376   return 0;
4377
4378  no_more_source:
4379   detect_info->found |= found;
4380   return 1;
4381 }
4382
4383 static void
4384 decode_coding_ccl (coding)
4385      struct coding_system *coding;
4386 {
4387   const unsigned char *src = coding->source + coding->consumed;
4388   const unsigned char *src_end = coding->source + coding->src_bytes;
4389   int *charbuf = coding->charbuf + coding->charbuf_used;
4390   int *charbuf_end = coding->charbuf + coding->charbuf_size;
4391   int consumed_chars = 0;
4392   int multibytep = coding->src_multibyte;
4393   struct ccl_program ccl;
4394   int source_charbuf[1024];
4395   int source_byteidx[1024];
4396   Lisp_Object attrs, charset_list;
4397
4398   CODING_GET_INFO (coding, attrs, charset_list);
4399   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4400
4401   while (src < src_end)
4402     {
4403       const unsigned char *p = src;
4404       int *source, *source_end;
4405       int i = 0;
4406
4407       if (multibytep)
4408         while (i < 1024 && p < src_end)
4409           {
4410             source_byteidx[i] = p - src;
4411             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4412           }
4413       else
4414         while (i < 1024 && p < src_end)
4415           source_charbuf[i++] = *p++;
4416
4417       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4418         ccl.last_block = 1;
4419
4420       source = source_charbuf;
4421       source_end = source + i;
4422       while (source < source_end)
4423         {
4424           ccl_driver (&ccl, source, charbuf,
4425                       source_end - source, charbuf_end - charbuf,
4426                       charset_list);
4427           source += ccl.consumed;
4428           charbuf += ccl.produced;
4429           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4430             break;
4431         }
4432       if (source < source_end)
4433         src += source_byteidx[source - source_charbuf];
4434       else
4435         src = p;
4436       consumed_chars += source - source_charbuf;
4437
4438       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4439           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4440         break;
4441     }
4442
4443   switch (ccl.status)
4444     {
4445     case CCL_STAT_SUSPEND_BY_SRC:
4446       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4447       break;
4448     case CCL_STAT_SUSPEND_BY_DST:
4449       break;
4450     case CCL_STAT_QUIT:
4451     case CCL_STAT_INVALID_CMD:
4452       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4453       break;
4454     default:
4455       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4456       break;
4457     }
4458   coding->consumed_char += consumed_chars;
4459   coding->consumed = src - coding->source;
4460   coding->charbuf_used = charbuf - coding->charbuf;
4461 }
4462
4463 static int
4464 encode_coding_ccl (coding)
4465      struct coding_system *coding;
4466 {
4467   struct ccl_program ccl;
4468   int multibytep = coding->dst_multibyte;
4469   int *charbuf = coding->charbuf;
4470   int *charbuf_end = charbuf + coding->charbuf_used;
4471   unsigned char *dst = coding->destination + coding->produced;
4472   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4473   unsigned char *adjusted_dst_end = dst_end - 1;
4474   int destination_charbuf[1024];
4475   int i, produced_chars = 0;
4476   Lisp_Object attrs, charset_list;
4477
4478   CODING_GET_INFO (coding, attrs, charset_list);
4479   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4480
4481   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4482   ccl.dst_multibyte = coding->dst_multibyte;
4483
4484   while (charbuf < charbuf_end && dst < adjusted_dst_end)
4485     {
4486       int dst_bytes = dst_end - dst;
4487       if (dst_bytes > 1024)
4488         dst_bytes = 1024;
4489
4490       ccl_driver (&ccl, charbuf, destination_charbuf,
4491                   charbuf_end - charbuf, dst_bytes, charset_list);
4492       charbuf += ccl.consumed;
4493       if (multibytep)
4494         for (i = 0; i < ccl.produced; i++)
4495           EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4496       else
4497         {
4498           for (i = 0; i < ccl.produced; i++)
4499             *dst++ = destination_charbuf[i] & 0xFF;
4500           produced_chars += ccl.produced;
4501         }
4502     }
4503
4504   switch (ccl.status)
4505     {
4506     case CCL_STAT_SUSPEND_BY_SRC:
4507       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4508       break;
4509     case CCL_STAT_SUSPEND_BY_DST:
4510       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4511       break;
4512     case CCL_STAT_QUIT:
4513     case CCL_STAT_INVALID_CMD:
4514       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4515       break;
4516     default:
4517       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4518       break;
4519     }
4520
4521   coding->produced_char += produced_chars;
4522   coding->produced = dst - coding->destination;
4523   return 0;
4524 }
4525
4526
4527 \f
4528 /*** 10, 11. no-conversion handlers ***/
4529
4530 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4531
4532 static void
4533 decode_coding_raw_text (coding)
4534      struct coding_system *coding;
4535 {
4536   coding->chars_at_source = 1;
4537   coding->consumed_char = 0;
4538   coding->consumed = 0;
4539   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4540 }
4541
4542 static int
4543 encode_coding_raw_text (coding)
4544      struct coding_system *coding;
4545 {
4546   int multibytep = coding->dst_multibyte;
4547   int *charbuf = coding->charbuf;
4548   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4549   unsigned char *dst = coding->destination + coding->produced;
4550   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4551   int produced_chars = 0;
4552   int c;
4553
4554   if (multibytep)
4555     {
4556       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4557
4558       if (coding->src_multibyte)
4559         while (charbuf < charbuf_end)
4560           {
4561             ASSURE_DESTINATION (safe_room);
4562             c = *charbuf++;
4563             if (ASCII_CHAR_P (c))
4564               EMIT_ONE_ASCII_BYTE (c);
4565             else if (CHAR_BYTE8_P (c))
4566               {
4567                 c = CHAR_TO_BYTE8 (c);
4568                 EMIT_ONE_BYTE (c);
4569               }
4570             else
4571               {
4572                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4573
4574                 CHAR_STRING_ADVANCE (c, p1);
4575                 while (p0 < p1)
4576                   {
4577                     EMIT_ONE_BYTE (*p0);
4578                     p0++;
4579                   }
4580               }
4581           }
4582       else
4583         while (charbuf < charbuf_end)
4584           {
4585             ASSURE_DESTINATION (safe_room);
4586             c = *charbuf++;
4587             EMIT_ONE_BYTE (c);
4588           }
4589     }
4590   else
4591     {
4592       if (coding->src_multibyte)
4593         {
4594           int safe_room = MAX_MULTIBYTE_LENGTH;
4595
4596           while (charbuf < charbuf_end)
4597             {
4598               ASSURE_DESTINATION (safe_room);
4599               c = *charbuf++;
4600               if (ASCII_CHAR_P (c))
4601                 *dst++ = c;
4602               else if (CHAR_BYTE8_P (c))
4603                 *dst++ = CHAR_TO_BYTE8 (c);
4604               else
4605                 CHAR_STRING_ADVANCE (c, dst);
4606               produced_chars++;
4607             }
4608         }
4609       else
4610         {
4611           ASSURE_DESTINATION (charbuf_end - charbuf);
4612           while (charbuf < charbuf_end && dst < dst_end)
4613             *dst++ = *charbuf++;
4614           produced_chars = dst - (coding->destination + coding->dst_bytes);
4615         }
4616     }
4617   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4618   coding->produced_char += produced_chars;
4619   coding->produced = dst - coding->destination;
4620   return 0;
4621 }
4622
4623 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4624    Check if a text is encoded in a charset-based coding system.  If it
4625    is, return 1, else return 0.  */
4626
4627 static int
4628 detect_coding_charset (coding, detect_info)
4629      struct coding_system *coding;
4630      struct coding_detection_info *detect_info;
4631 {
4632   const unsigned char *src = coding->source, *src_base;
4633   const unsigned char *src_end = coding->source + coding->src_bytes;
4634   int multibytep = coding->src_multibyte;
4635   int consumed_chars = 0;
4636   Lisp_Object attrs, valids;
4637   int found = 0;
4638
4639   detect_info->checked |= CATEGORY_MASK_CHARSET;
4640
4641   coding = &coding_categories[coding_category_charset];
4642   attrs = CODING_ID_ATTRS (coding->id);
4643   valids = AREF (attrs, coding_attr_charset_valids);
4644
4645   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4646     src += coding->head_ascii;
4647
4648   while (1)
4649     {
4650       int c;
4651
4652       src_base = src;
4653       ONE_MORE_BYTE (c);
4654       if (c < 0)
4655         continue;
4656       if (NILP (AREF (valids, c)))
4657         break;
4658       if (c >= 0x80)
4659         found = CATEGORY_MASK_CHARSET;
4660     }
4661   detect_info->rejected |= CATEGORY_MASK_CHARSET;
4662   return 0;
4663
4664  no_more_source:
4665   detect_info->found |= found;
4666   return 1;
4667 }
4668
4669 static void
4670 decode_coding_charset (coding)
4671      struct coding_system *coding;
4672 {
4673   const unsigned char *src = coding->source + coding->consumed;
4674   const unsigned char *src_end = coding->source + coding->src_bytes;
4675   const unsigned char *src_base;
4676   int *charbuf = coding->charbuf + coding->charbuf_used;
4677   int *charbuf_end
4678     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4679   int consumed_chars = 0, consumed_chars_base;
4680   int multibytep = coding->src_multibyte;
4681   Lisp_Object attrs, charset_list, valids;
4682   int char_offset = coding->produced_char;
4683   int last_offset = char_offset;
4684   int last_id = charset_ascii;
4685
4686   CODING_GET_INFO (coding, attrs, charset_list);
4687   valids = AREF (attrs, coding_attr_charset_valids);
4688
4689   while (1)
4690     {
4691       int c;
4692       Lisp_Object val;
4693       struct charset *charset;
4694       int dim;
4695       int len = 1;
4696       unsigned code;
4697
4698       src_base = src;
4699       consumed_chars_base = consumed_chars;
4700
4701       if (charbuf >= charbuf_end)
4702         break;
4703
4704       ONE_MORE_BYTE (c);
4705       if (c < 0)
4706         goto invalid_code;
4707       code = c;
4708
4709       val = AREF (valids, c);
4710       if (NILP (val))
4711         goto invalid_code;
4712       if (INTEGERP (val))
4713         {
4714           charset = CHARSET_FROM_ID (XFASTINT (val));
4715           dim = CHARSET_DIMENSION (charset);
4716           while (len < dim)
4717             {
4718               ONE_MORE_BYTE (c);
4719               code = (code << 8) | c;
4720               len++;
4721             }
4722           CODING_DECODE_CHAR (coding, src, src_base, src_end,
4723                               charset, code, c);
4724         }
4725       else
4726         {
4727           /* VAL is a list of charset IDs.  It is assured that the
4728              list is sorted by charset dimensions (smaller one
4729              comes first).  */
4730           while (CONSP (val))
4731             {
4732               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4733               dim = CHARSET_DIMENSION (charset);
4734               while (len < dim)
4735                 {
4736                   ONE_MORE_BYTE (c);
4737                   code = (code << 8) | c;
4738                   len++;
4739                 }
4740               CODING_DECODE_CHAR (coding, src, src_base,
4741                                   src_end, charset, code, c);
4742               if (c >= 0)
4743                 break;
4744               val = XCDR (val);
4745             }
4746         }
4747       if (c < 0)
4748         goto invalid_code;
4749       if (charset->id != charset_ascii
4750           && last_id != charset->id)
4751         {
4752           if (last_id != charset_ascii)
4753             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4754           last_id = charset->id;
4755           last_offset = char_offset;
4756         }
4757
4758       *charbuf++ = c;
4759       char_offset++;
4760       continue;
4761
4762     invalid_code:
4763       src = src_base;
4764       consumed_chars = consumed_chars_base;
4765       ONE_MORE_BYTE (c);
4766       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4767       char_offset++;
4768       coding->errors++;
4769     }
4770
4771  no_more_source:
4772   if (last_id != charset_ascii)
4773     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4774   coding->consumed_char += consumed_chars_base;
4775   coding->consumed = src_base - coding->source;
4776   coding->charbuf_used = charbuf - coding->charbuf;
4777 }
4778
4779 static int
4780 encode_coding_charset (coding)
4781      struct coding_system *coding;
4782 {
4783   int multibytep = coding->dst_multibyte;
4784   int *charbuf = coding->charbuf;
4785   int *charbuf_end = charbuf + coding->charbuf_used;
4786   unsigned char *dst = coding->destination + coding->produced;
4787   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4788   int safe_room = MAX_MULTIBYTE_LENGTH;
4789   int produced_chars = 0;
4790   Lisp_Object attrs, charset_list;
4791   int ascii_compatible;
4792   int c;
4793
4794   CODING_GET_INFO (coding, attrs, charset_list);
4795   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4796
4797   while (charbuf < charbuf_end)
4798     {
4799       struct charset *charset;
4800       unsigned code;
4801
4802       ASSURE_DESTINATION (safe_room);
4803       c = *charbuf++;
4804       if (ascii_compatible && ASCII_CHAR_P (c))
4805         EMIT_ONE_ASCII_BYTE (c);
4806       else if (CHAR_BYTE8_P (c))
4807         {
4808           c = CHAR_TO_BYTE8 (c);
4809           EMIT_ONE_BYTE (c);
4810         }
4811       else
4812         {
4813           charset = char_charset (c, charset_list, &code);
4814           if (charset)
4815             {
4816               if (CHARSET_DIMENSION (charset) == 1)
4817                 EMIT_ONE_BYTE (code);
4818               else if (CHARSET_DIMENSION (charset) == 2)
4819                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4820               else if (CHARSET_DIMENSION (charset) == 3)
4821                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4822               else
4823                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4824                                  (code >> 8) & 0xFF, code & 0xFF);
4825             }
4826           else
4827             {
4828               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4829                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4830               else
4831                 c = coding->default_char;
4832               EMIT_ONE_BYTE (c);
4833             }
4834         }
4835     }
4836
4837   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4838   coding->produced_char += produced_chars;
4839   coding->produced = dst - coding->destination;
4840   return 0;
4841 }
4842
4843 \f
4844 /*** 7. C library functions ***/
4845
4846 /* Setup coding context CODING from information about CODING_SYSTEM.
4847    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
4848    CODING_SYSTEM is invalid, signal an error.  */
4849
4850 void
4851 setup_coding_system (coding_system, coding)
4852      Lisp_Object coding_system;
4853      struct coding_system *coding;
4854 {
4855   Lisp_Object attrs;
4856   Lisp_Object eol_type;
4857   Lisp_Object coding_type;
4858   Lisp_Object val;
4859
4860   if (NILP (coding_system))
4861     coding_system = Qno_conversion;
4862
4863   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4864
4865   attrs = CODING_ID_ATTRS (coding->id);
4866   eol_type = CODING_ID_EOL_TYPE (coding->id);
4867
4868   coding->mode = 0;
4869   coding->head_ascii = -1;
4870   coding->common_flags
4871     = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4872   if (! NILP (CODING_ATTR_POST_READ (attrs)))
4873     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
4874   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
4875     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
4876   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
4877     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4878
4879   val = CODING_ATTR_SAFE_CHARSETS (attrs);
4880   coding->max_charset_id = SCHARS (val) - 1;
4881   coding->safe_charsets = (char *) SDATA (val);
4882   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4883
4884   coding_type = CODING_ATTR_TYPE (attrs);
4885   if (EQ (coding_type, Qundecided))
4886     {
4887       coding->detector = NULL;
4888       coding->decoder = decode_coding_raw_text;
4889       coding->encoder = encode_coding_raw_text;
4890       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4891     }
4892   else if (EQ (coding_type, Qiso_2022))
4893     {
4894       int i;
4895       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4896
4897       /* Invoke graphic register 0 to plane 0.  */
4898       CODING_ISO_INVOCATION (coding, 0) = 0;
4899       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
4900       CODING_ISO_INVOCATION (coding, 1)
4901         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4902       /* Setup the initial status of designation.  */
4903       for (i = 0; i < 4; i++)
4904         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4905       /* Not single shifting initially.  */
4906       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4907       /* Beginning of buffer should also be regarded as bol. */
4908       CODING_ISO_BOL (coding) = 1;
4909       coding->detector = detect_coding_iso_2022;
4910       coding->decoder = decode_coding_iso_2022;
4911       coding->encoder = encode_coding_iso_2022;
4912       if (flags & CODING_ISO_FLAG_SAFE)
4913         coding->mode |= CODING_MODE_SAFE_ENCODING;
4914       coding->common_flags
4915         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4916             | CODING_REQUIRE_FLUSHING_MASK);
4917       if (flags & CODING_ISO_FLAG_COMPOSITION)
4918         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4919       if (flags & CODING_ISO_FLAG_DESIGNATION)
4920         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
4921       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4922         {
4923           setup_iso_safe_charsets (attrs);
4924           val = CODING_ATTR_SAFE_CHARSETS (attrs);
4925           coding->max_charset_id = SCHARS (val) - 1;
4926           coding->safe_charsets = (char *) SDATA (val);
4927         }
4928       CODING_ISO_FLAGS (coding) = flags;
4929     }
4930   else if (EQ (coding_type, Qcharset))
4931     {
4932       coding->detector = detect_coding_charset;
4933       coding->decoder = decode_coding_charset;
4934       coding->encoder = encode_coding_charset;
4935       coding->common_flags
4936         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4937     }
4938   else if (EQ (coding_type, Qutf_8))
4939     {
4940       coding->detector = detect_coding_utf_8;
4941       coding->decoder = decode_coding_utf_8;
4942       coding->encoder = encode_coding_utf_8;
4943       coding->common_flags
4944         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4945     }
4946   else if (EQ (coding_type, Qutf_16))
4947     {
4948       val = AREF (attrs, coding_attr_utf_16_bom);
4949       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4950                                     : EQ (val, Qt) ? utf_16_with_bom
4951                                     : utf_16_without_bom);
4952       val = AREF (attrs, coding_attr_utf_16_endian);
4953       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
4954                                        : utf_16_little_endian);
4955       CODING_UTF_16_SURROGATE (coding) = 0;
4956       coding->detector = detect_coding_utf_16;
4957       coding->decoder = decode_coding_utf_16;
4958       coding->encoder = encode_coding_utf_16;
4959       coding->common_flags
4960         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4961       if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
4962         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4963     }
4964   else if (EQ (coding_type, Qccl))
4965     {
4966       coding->detector = detect_coding_ccl;
4967       coding->decoder = decode_coding_ccl;
4968       coding->encoder = encode_coding_ccl;
4969       coding->common_flags
4970         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4971             | CODING_REQUIRE_FLUSHING_MASK);
4972     }
4973   else if (EQ (coding_type, Qemacs_mule))
4974     {
4975       coding->detector = detect_coding_emacs_mule;
4976       coding->decoder = decode_coding_emacs_mule;
4977       coding->encoder = encode_coding_emacs_mule;
4978       coding->common_flags
4979         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4980       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4981           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4982         {
4983           Lisp_Object tail, safe_charsets;
4984           int max_charset_id = 0;
4985
4986           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4987                tail = XCDR (tail))
4988             if (max_charset_id < XFASTINT (XCAR (tail)))
4989               max_charset_id = XFASTINT (XCAR (tail));
4990           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4991                                         make_number (255));
4992           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4993                tail = XCDR (tail))
4994             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
4995           coding->max_charset_id = max_charset_id;
4996           coding->safe_charsets = (char *) SDATA (safe_charsets);
4997         }
4998     }
4999   else if (EQ (coding_type, Qshift_jis))
5000     {
5001       coding->detector = detect_coding_sjis;
5002       coding->decoder = decode_coding_sjis;
5003       coding->encoder = encode_coding_sjis;
5004       coding->common_flags
5005         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5006     }
5007   else if (EQ (coding_type, Qbig5))
5008     {
5009       coding->detector = detect_coding_big5;
5010       coding->decoder = decode_coding_big5;
5011       coding->encoder = encode_coding_big5;
5012       coding->common_flags
5013         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5014     }
5015   else                          /* EQ (coding_type, Qraw_text) */
5016     {
5017       coding->detector = NULL;
5018       coding->decoder = decode_coding_raw_text;
5019       coding->encoder = encode_coding_raw_text;
5020     }
5021
5022   return;
5023 }
5024
5025 /* Return raw-text or one of its subsidiaries that has the same
5026    eol_type as CODING-SYSTEM.  */
5027
5028 Lisp_Object
5029 raw_text_coding_system (coding_system)
5030      Lisp_Object coding_system;
5031 {
5032   Lisp_Object spec, attrs;
5033   Lisp_Object eol_type, raw_text_eol_type;
5034
5035   if (NILP (coding_system))
5036     return Qraw_text;
5037   spec = CODING_SYSTEM_SPEC (coding_system);
5038   attrs = AREF (spec, 0);
5039
5040   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5041     return coding_system;
5042
5043   eol_type = AREF (spec, 2);
5044   if (VECTORP (eol_type))
5045     return Qraw_text;
5046   spec = CODING_SYSTEM_SPEC (Qraw_text);
5047   raw_text_eol_type = AREF (spec, 2);
5048   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5049           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5050           : AREF (raw_text_eol_type, 2));
5051 }
5052
5053
5054 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5055    does, return one of the subsidiary that has the same eol-spec as
5056    PARENT.  Otherwise, return CODING_SYSTEM.  */
5057
5058 Lisp_Object
5059 coding_inherit_eol_type (coding_system, parent)
5060      Lisp_Object coding_system, parent;
5061 {
5062   Lisp_Object spec, eol_type;
5063
5064   if (NILP (coding_system))
5065     coding_system = Qraw_text;
5066   spec = CODING_SYSTEM_SPEC (coding_system);
5067   eol_type = AREF (spec, 2);
5068   if (VECTORP (eol_type)
5069       && ! NILP (parent))
5070     {
5071       Lisp_Object parent_spec;
5072       Lisp_Object parent_eol_type;
5073
5074       parent_spec
5075         = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
5076       parent_eol_type = AREF (parent_spec, 2);
5077       if (EQ (parent_eol_type, Qunix))
5078         coding_system = AREF (eol_type, 0);
5079       else if (EQ (parent_eol_type, Qdos))
5080         coding_system = AREF (eol_type, 1);
5081       else if (EQ (parent_eol_type, Qmac))
5082         coding_system = AREF (eol_type, 2);
5083     }
5084   return coding_system;
5085 }
5086
5087 /* Emacs has a mechanism to automatically detect a coding system if it
5088    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5089    it's impossible to distinguish some coding systems accurately
5090    because they use the same range of codes.  So, at first, coding
5091    systems are categorized into 7, those are:
5092
5093    o coding-category-emacs-mule
5094
5095         The category for a coding system which has the same code range
5096         as Emacs' internal format.  Assigned the coding-system (Lisp
5097         symbol) `emacs-mule' by default.
5098
5099    o coding-category-sjis
5100
5101         The category for a coding system which has the same code range
5102         as SJIS.  Assigned the coding-system (Lisp
5103         symbol) `japanese-shift-jis' by default.
5104
5105    o coding-category-iso-7
5106
5107         The category for a coding system which has the same code range
5108         as ISO2022 of 7-bit environment.  This doesn't use any locking
5109         shift and single shift functions.  This can encode/decode all
5110         charsets.  Assigned the coding-system (Lisp symbol)
5111         `iso-2022-7bit' by default.
5112
5113    o coding-category-iso-7-tight
5114
5115         Same as coding-category-iso-7 except that this can
5116         encode/decode only the specified charsets.
5117
5118    o coding-category-iso-8-1
5119
5120         The category for a coding system which has the same code range
5121         as ISO2022 of 8-bit environment and graphic plane 1 used only
5122         for DIMENSION1 charset.  This doesn't use any locking shift
5123         and single shift functions.  Assigned the coding-system (Lisp
5124         symbol) `iso-latin-1' by default.
5125
5126    o coding-category-iso-8-2
5127
5128         The category for a coding system which has the same code range
5129         as ISO2022 of 8-bit environment and graphic plane 1 used only
5130         for DIMENSION2 charset.  This doesn't use any locking shift
5131         and single shift functions.  Assigned the coding-system (Lisp
5132         symbol) `japanese-iso-8bit' by default.
5133
5134    o coding-category-iso-7-else
5135
5136         The category for a coding system which has the same code range
5137         as ISO2022 of 7-bit environemnt but uses locking shift or
5138         single shift functions.  Assigned the coding-system (Lisp
5139         symbol) `iso-2022-7bit-lock' by default.
5140
5141    o coding-category-iso-8-else
5142
5143         The category for a coding system which has the same code range
5144         as ISO2022 of 8-bit environemnt but uses locking shift or
5145         single shift functions.  Assigned the coding-system (Lisp
5146         symbol) `iso-2022-8bit-ss2' by default.
5147
5148    o coding-category-big5
5149
5150         The category for a coding system which has the same code range
5151         as BIG5.  Assigned the coding-system (Lisp symbol)
5152         `cn-big5' by default.
5153
5154    o coding-category-utf-8
5155
5156         The category for a coding system which has the same code range
5157         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
5158         symbol) `utf-8' by default.
5159
5160    o coding-category-utf-16-be
5161
5162         The category for a coding system in which a text has an
5163         Unicode signature (cf. Unicode Standard) in the order of BIG
5164         endian at the head.  Assigned the coding-system (Lisp symbol)
5165         `utf-16-be' by default.
5166
5167    o coding-category-utf-16-le
5168
5169         The category for a coding system in which a text has an
5170         Unicode signature (cf. Unicode Standard) in the order of
5171         LITTLE endian at the head.  Assigned the coding-system (Lisp
5172         symbol) `utf-16-le' by default.
5173
5174    o coding-category-ccl
5175
5176         The category for a coding system of which encoder/decoder is
5177         written in CCL programs.  The default value is nil, i.e., no
5178         coding system is assigned.
5179
5180    o coding-category-binary
5181
5182         The category for a coding system not categorized in any of the
5183         above.  Assigned the coding-system (Lisp symbol)
5184         `no-conversion' by default.
5185
5186    Each of them is a Lisp symbol and the value is an actual
5187    `coding-system's (this is also a Lisp symbol) assigned by a user.
5188    What Emacs does actually is to detect a category of coding system.
5189    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5190    decide only one possible category, it selects a category of the
5191    highest priority.  Priorities of categories are also specified by a
5192    user in a Lisp variable `coding-category-list'.
5193
5194 */
5195
5196 #define EOL_SEEN_NONE   0
5197 #define EOL_SEEN_LF     1
5198 #define EOL_SEEN_CR     2
5199 #define EOL_SEEN_CRLF   4
5200
5201 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5202    SOURCE is encoded.  If CATEGORY is one of
5203    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5204    two-byte, else they are encoded by one-byte.
5205
5206    Return one of EOL_SEEN_XXX.  */
5207
5208 #define MAX_EOL_CHECK_COUNT 3
5209
5210 static int
5211 detect_eol (source, src_bytes, category)
5212      unsigned char *source;
5213      EMACS_INT src_bytes;
5214      enum coding_category category;
5215 {
5216   unsigned char *src = source, *src_end = src + src_bytes;
5217   unsigned char c;
5218   int total  = 0;
5219   int eol_seen = EOL_SEEN_NONE;
5220
5221   if ((1 << category) & CATEGORY_MASK_UTF_16)
5222     {
5223       int msb, lsb;
5224
5225       msb = category == (coding_category_utf_16_le
5226                          | coding_category_utf_16_le_nosig);
5227       lsb = 1 - msb;
5228
5229       while (src + 1 < src_end)
5230         {
5231           c = src[lsb];
5232           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5233             {
5234               int this_eol;
5235
5236               if (c == '\n')
5237                 this_eol = EOL_SEEN_LF;
5238               else if (src + 3 >= src_end
5239                        || src[msb + 2] != 0
5240                        || src[lsb + 2] != '\n')
5241                 this_eol = EOL_SEEN_CR;
5242               else
5243                 this_eol = EOL_SEEN_CRLF;
5244
5245               if (eol_seen == EOL_SEEN_NONE)
5246                 /* This is the first end-of-line.  */
5247                 eol_seen = this_eol;
5248               else if (eol_seen != this_eol)
5249                 {
5250                   /* The found type is different from what found before.  */
5251                   eol_seen = EOL_SEEN_LF;
5252                   break;
5253                 }
5254               if (++total == MAX_EOL_CHECK_COUNT)
5255                 break;
5256             }
5257           src += 2;
5258         }
5259     }
5260   else
5261     {
5262       while (src < src_end)
5263         {
5264           c = *src++;
5265           if (c == '\n' || c == '\r')
5266             {
5267               int this_eol;
5268
5269               if (c == '\n')
5270                 this_eol = EOL_SEEN_LF;
5271               else if (src >= src_end || *src != '\n')
5272                 this_eol = EOL_SEEN_CR;
5273               else
5274                 this_eol = EOL_SEEN_CRLF, src++;
5275
5276               if (eol_seen == EOL_SEEN_NONE)
5277                 /* This is the first end-of-line.  */
5278                 eol_seen = this_eol;
5279               else if (eol_seen != this_eol)
5280                 {
5281                   /* The found type is different from what found before.  */
5282                   eol_seen = EOL_SEEN_LF;
5283                   break;
5284                 }
5285               if (++total == MAX_EOL_CHECK_COUNT)
5286                 break;
5287             }
5288         }
5289     }
5290   return eol_seen;
5291 }
5292
5293
5294 static Lisp_Object
5295 adjust_coding_eol_type (coding, eol_seen)
5296      struct coding_system *coding;
5297      int eol_seen;
5298 {
5299   Lisp_Object eol_type;
5300
5301   eol_type = CODING_ID_EOL_TYPE (coding->id);
5302   if (eol_seen & EOL_SEEN_LF)
5303     {
5304       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5305       eol_type = Qunix;
5306     }
5307   else if (eol_seen & EOL_SEEN_CRLF)
5308     {
5309       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5310       eol_type = Qdos;
5311     }
5312   else if (eol_seen & EOL_SEEN_CR)
5313     {
5314       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5315       eol_type = Qmac;
5316     }
5317   return eol_type;
5318 }
5319
5320 /* Detect how a text specified in CODING is encoded.  If a coding
5321    system is detected, update fields of CODING by the detected coding
5322    system.  */
5323
5324 void
5325 detect_coding (coding)
5326      struct coding_system *coding;
5327 {
5328   const unsigned char *src, *src_end;
5329   Lisp_Object attrs, coding_type;
5330
5331   coding->consumed = coding->consumed_char = 0;
5332   coding->produced = coding->produced_char = 0;
5333   coding_set_source (coding);
5334
5335   src_end = coding->source + coding->src_bytes;
5336
5337   /* If we have not yet decided the text encoding type, detect it
5338      now.  */
5339   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5340     {
5341       int c, i;
5342
5343       for (i = 0, src = coding->source; src < src_end; i++, src++)
5344         {
5345           c = *src;
5346           if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5347                                         || c == ISO_CODE_SI
5348                                         || c == ISO_CODE_SO)))
5349             break;
5350         }
5351       coding->head_ascii = src - (coding->source + coding->consumed);
5352
5353       if (coding->head_ascii < coding->src_bytes)
5354         {
5355           struct coding_detection_info detect_info;
5356           enum coding_category category;
5357           struct coding_system *this;
5358
5359           detect_info.checked = detect_info.found = detect_info.rejected = 0;
5360           for (i = 0; i < coding_category_raw_text; i++)
5361             {
5362               category = coding_priorities[i];
5363               this = coding_categories + category;
5364               if (this->id < 0)
5365                 {
5366                   /* No coding system of this category is defined.  */
5367                   detect_info.rejected |= (1 << category);
5368                 }
5369               else if (category >= coding_category_raw_text)
5370                 continue;
5371               else if (detect_info.checked & (1 << category))
5372                 {
5373                   if (detect_info.found & (1 << category))
5374                     break;
5375                 }
5376               else if ((*(this->detector)) (coding, &detect_info)
5377                        && detect_info.found & (1 << category))
5378                 {
5379                   if (category == coding_category_utf_16_auto)
5380                     {
5381                       if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5382                         category = coding_category_utf_16_le;
5383                       else
5384                         category = coding_category_utf_16_be;
5385                     }
5386                   break;
5387                 }
5388             }
5389           if (i < coding_category_raw_text)
5390             setup_coding_system (CODING_ID_NAME (this->id), coding);
5391           else if (detect_info.rejected == CATEGORY_MASK_ANY)
5392             setup_coding_system (Qraw_text, coding);
5393           else if (detect_info.rejected)
5394             for (i = 0; i < coding_category_raw_text; i++)
5395               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5396                 {
5397                   this = coding_categories + coding_priorities[i];
5398                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5399                   break;
5400                 }
5401         }
5402     }
5403   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5404            == coding_category_utf_16_auto)
5405     {
5406       Lisp_Object coding_systems;
5407       struct coding_detection_info detect_info;
5408
5409       coding_systems
5410         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5411       detect_info.found = detect_info.rejected = 0;
5412       if (CONSP (coding_systems)
5413           && detect_coding_utf_16 (coding, &detect_info))
5414         {
5415           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5416             setup_coding_system (XCAR (coding_systems), coding);
5417           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
5418             setup_coding_system (XCDR (coding_systems), coding);
5419         }
5420     }
5421 }
5422
5423
5424 static void
5425 decode_eol (coding)
5426      struct coding_system *coding;
5427 {
5428   Lisp_Object eol_type;
5429   unsigned char *p, *pbeg, *pend;
5430
5431   eol_type = CODING_ID_EOL_TYPE (coding->id);
5432   if (EQ (eol_type, Qunix))
5433     return;
5434
5435   if (NILP (coding->dst_object))
5436     pbeg = coding->destination;
5437   else
5438     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5439   pend = pbeg + coding->produced;
5440
5441   if (VECTORP (eol_type))
5442     {
5443       int eol_seen = EOL_SEEN_NONE;
5444
5445       for (p = pbeg; p < pend; p++)
5446         {
5447           if (*p == '\n')
5448             eol_seen |= EOL_SEEN_LF;
5449           else if (*p == '\r')
5450             {
5451               if (p + 1 < pend && *(p + 1) == '\n')
5452                 {
5453                   eol_seen |= EOL_SEEN_CRLF;
5454                   p++;
5455                 }
5456               else
5457                 eol_seen |= EOL_SEEN_CR;
5458             }
5459         }
5460       if (eol_seen != EOL_SEEN_NONE
5461           && eol_seen != EOL_SEEN_LF
5462           && eol_seen != EOL_SEEN_CRLF
5463           && eol_seen != EOL_SEEN_CR)
5464         eol_seen = EOL_SEEN_LF;
5465       if (eol_seen != EOL_SEEN_NONE)
5466         eol_type = adjust_coding_eol_type (coding, eol_seen);
5467     }
5468
5469   if (EQ (eol_type, Qmac))
5470     {
5471       for (p = pbeg; p < pend; p++)
5472         if (*p == '\r')
5473           *p = '\n';
5474     }
5475   else if (EQ (eol_type, Qdos))
5476     {
5477       int n = 0;
5478
5479       if (NILP (coding->dst_object))
5480         {
5481           for (p = pend - 2; p >= pbeg; p--)
5482             if (*p == '\r')
5483               {
5484                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5485                 n++;
5486               }
5487         }
5488       else
5489         {
5490           for (p = pend - 2; p >= pbeg; p--)
5491             if (*p == '\r')
5492               {
5493                 int pos_byte = coding->dst_pos_byte + (p - pbeg);
5494                 int pos = BYTE_TO_CHAR (pos_byte);
5495
5496                 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5497                 n++;
5498               }
5499         }
5500       coding->produced -= n;
5501       coding->produced_char -= n;
5502     }
5503 }
5504
5505
5506 /* Return a translation table (or list of them) from coding system
5507    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5508    decoding (ENCODEP is zero). */
5509
5510 static INLINE
5511 get_translation_table (attrs, encodep)
5512 {
5513   Lisp_Object standard, translation_table;
5514
5515   if (encodep)
5516     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
5517       standard = Vstandard_translation_table_for_encode;
5518   else
5519     translation_table = CODING_ATTR_DECODE_TBL (attrs),
5520       standard = Vstandard_translation_table_for_decode;
5521   if (NILP (translation_table))
5522     return standard;
5523   if (SYMBOLP (translation_table))
5524     translation_table = Fget (translation_table, Qtranslation_table);
5525   else if (CONSP (translation_table))
5526     {
5527       Lisp_Object val;
5528
5529       translation_table = Fcopy_sequence (translation_table);
5530       for (val = translation_table; CONSP (val); val = XCDR (val))
5531         if (SYMBOLP (XCAR (val)))
5532           XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
5533     }
5534   if (! NILP (standard))
5535     {
5536       if (CONSP (translation_table))
5537         translation_table = nconc2 (translation_table, Fcons (standard, Qnil));
5538       else
5539         translation_table = Fcons (translation_table, Fcons (standard, Qnil));
5540     }
5541   return translation_table;
5542 }
5543
5544
5545 static Lisp_Object
5546 get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
5547      Lisp_Object val;
5548      int *buf, *buf_end;
5549      int last_block;
5550      int *from_nchars, *to_nchars;
5551 {
5552   /* VAL is TO or (([FROM-CHAR ...] .  TO) ...) where TO is TO-CHAR or
5553      [TO-CHAR ...].  */
5554   if (CONSP (val))
5555     {
5556       Lisp_Object from, tail;
5557       int i, len;
5558
5559       for (tail = val; CONSP (tail); tail = XCDR (tail))
5560         {
5561           val = XCAR (tail);
5562           from = XCAR (val);
5563           len = ASIZE (from);
5564           for (i = 0; i < len; i++)
5565             {
5566               if (buf + i == buf_end)
5567                 {
5568                   if (! last_block)
5569                     return Qt;
5570                   break;
5571                 }
5572               if (XINT (AREF (from, i)) != buf[i])
5573                 break;
5574             }
5575           if (i == len)
5576             {
5577               val = XCDR (val);
5578               *from_nchars = len;
5579               break;
5580             }
5581         }
5582       if (! CONSP (tail))
5583         return Qnil;
5584     }
5585   if (VECTORP (val))
5586     *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
5587   else
5588     *buf = XINT (val);
5589   return val;
5590 }
5591
5592
5593 static int
5594 produce_chars (coding, translation_table, last_block)
5595      struct coding_system *coding;
5596      Lisp_Object translation_table;
5597      int last_block;
5598 {
5599   unsigned char *dst = coding->destination + coding->produced;
5600   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5601   int produced;
5602   int produced_chars = 0;
5603   int carryover = 0;
5604
5605   if (! coding->chars_at_source)
5606     {
5607       /* Characters are in coding->charbuf.  */
5608       int *buf = coding->charbuf;
5609       int *buf_end = buf + coding->charbuf_used;
5610
5611       if (BUFFERP (coding->src_object)
5612           && EQ (coding->src_object, coding->dst_object))
5613         dst_end = ((unsigned char *) coding->source) + coding->consumed;
5614
5615       while (buf < buf_end)
5616         {
5617           int c = *buf, i;
5618
5619           if (c >= 0)
5620             {
5621               int from_nchars = 1, to_nchars = 1;
5622               Lisp_Object trans = Qnil;
5623
5624               if (! NILP (translation_table)
5625                   && ! NILP (trans = CHAR_TABLE_REF (translation_table, c)))
5626                 {
5627                   trans = get_translation (trans, buf, buf_end, last_block,
5628                                            &from_nchars, &to_nchars);
5629                   if (EQ (trans, Qt))
5630                     break;
5631                   c = *buf;
5632                 }
5633
5634               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
5635                 {
5636                   dst = alloc_destination (coding,
5637                                            buf_end - buf
5638                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
5639                                            dst);
5640                   dst_end = coding->destination + coding->dst_bytes;
5641                 }
5642
5643               for (i = 0; i < to_nchars; i++)
5644                 {
5645                   if (i > 0)
5646                     c = XINT (AREF (trans, i));
5647                   if (coding->dst_multibyte
5648                       || ! CHAR_BYTE8_P (c))
5649                     CHAR_STRING_ADVANCE (c, dst);
5650                   else
5651                     *dst++ = CHAR_TO_BYTE8 (c);
5652                 }
5653               produced_chars += to_nchars;
5654               *buf++ = to_nchars;
5655               while (--from_nchars > 0)
5656                 *buf++ = 0;
5657             }
5658           else
5659             /* This is an annotation datum.  (-C) is the length.  */
5660             buf += -c;
5661         }
5662       carryover = buf_end - buf;
5663     }
5664   else
5665     {
5666       const unsigned char *src = coding->source;
5667       const unsigned char *src_end = src + coding->src_bytes;
5668       Lisp_Object eol_type;
5669
5670       eol_type = CODING_ID_EOL_TYPE (coding->id);
5671
5672       if (coding->src_multibyte != coding->dst_multibyte)
5673         {
5674           if (coding->src_multibyte)
5675             {
5676               int multibytep = 1;
5677               int consumed_chars;
5678
5679               while (1)
5680                 {
5681                   const unsigned char *src_base = src;
5682                   int c;
5683
5684                   ONE_MORE_BYTE (c);
5685                   if (c == '\r')
5686                     {
5687                       if (EQ (eol_type, Qdos))
5688                         {
5689                           if (src == src_end)
5690                             {
5691                               record_conversion_result
5692                                 (coding, CODING_RESULT_INSUFFICIENT_SRC);
5693                               goto no_more_source;
5694                             }
5695                           if (*src == '\n')
5696                             c = *src++;
5697                         }
5698                       else if (EQ (eol_type, Qmac))
5699                         c = '\n';
5700                     }
5701                   if (dst == dst_end)
5702                     {
5703                       coding->consumed = src - coding->source;
5704
5705                     if (EQ (coding->src_object, coding->dst_object))
5706                       dst_end = (unsigned char *) src;
5707                     if (dst == dst_end)
5708                       {
5709                         dst = alloc_destination (coding, src_end - src + 1,
5710                                                  dst);
5711                         dst_end = coding->destination + coding->dst_bytes;
5712                         coding_set_source (coding);
5713                         src = coding->source + coding->consumed;
5714                         src_end = coding->source + coding->src_bytes;
5715                       }
5716                     }
5717                   *dst++ = c;
5718                   produced_chars++;
5719                 }
5720             no_more_source:
5721               ;
5722             }
5723           else
5724             while (src < src_end)
5725               {
5726                 int multibytep = 1;
5727                 int c = *src++;
5728
5729                 if (c == '\r')
5730                   {
5731                     if (EQ (eol_type, Qdos))
5732                       {
5733                         if (src < src_end
5734                             && *src == '\n')
5735                           c = *src++;
5736                       }
5737                     else if (EQ (eol_type, Qmac))
5738                       c = '\n';
5739                   }
5740                 if (dst >= dst_end - 1)
5741                   {
5742                     coding->consumed = src - coding->source;
5743
5744                     if (EQ (coding->src_object, coding->dst_object))
5745                       dst_end = (unsigned char *) src;
5746                     if (dst >= dst_end - 1)
5747                       {
5748                         dst = alloc_destination (coding, src_end - src + 2,
5749                                                  dst);
5750                         dst_end = coding->destination + coding->dst_bytes;
5751                         coding_set_source (coding);
5752                         src = coding->source + coding->consumed;
5753                         src_end = coding->source + coding->src_bytes;
5754                       }
5755                   }
5756                 EMIT_ONE_BYTE (c);
5757               }
5758         }
5759       else
5760         {
5761           if (!EQ (coding->src_object, coding->dst_object))
5762             {
5763               int require = coding->src_bytes - coding->dst_bytes;
5764
5765               if (require > 0)
5766                 {
5767                   EMACS_INT offset = src - coding->source;
5768
5769                   dst = alloc_destination (coding, require, dst);
5770                   coding_set_source (coding);
5771                   src = coding->source + offset;
5772                   src_end = coding->source + coding->src_bytes;
5773                 }
5774             }
5775           produced_chars = coding->src_chars;
5776           while (src < src_end)
5777             {
5778               int c = *src++;
5779
5780               if (c == '\r')
5781                 {
5782                   if (EQ (eol_type, Qdos))
5783                     {
5784                       if (src < src_end
5785                           && *src == '\n')
5786                         c = *src++;
5787                       produced_chars--;
5788                     }
5789                   else if (EQ (eol_type, Qmac))
5790                     c = '\n';
5791                 }
5792               *dst++ = c;
5793             }
5794         }
5795       coding->consumed = coding->src_bytes;
5796       coding->consumed_char = coding->src_chars;
5797     }
5798
5799   produced = dst - (coding->destination + coding->produced);
5800   if (BUFFERP (coding->dst_object))
5801     insert_from_gap (produced_chars, produced);
5802   coding->produced += produced;
5803   coding->produced_char += produced_chars;
5804   return carryover;
5805 }
5806
5807 /* Compose text in CODING->object according to the annotation data at
5808    CHARBUF.  CHARBUF is an array:
5809      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5810  */
5811
5812 static INLINE void
5813 produce_composition (coding, charbuf, pos)
5814      struct coding_system *coding;
5815      int *charbuf;
5816      EMACS_INT pos;
5817 {
5818   int len;
5819   EMACS_INT to;
5820   enum composition_method method;
5821   Lisp_Object components;
5822
5823   len = -charbuf[0];
5824   to = pos + charbuf[2];
5825   method = (enum composition_method) (charbuf[3]);
5826
5827   if (method == COMPOSITION_RELATIVE)
5828     components = Qnil;
5829   else
5830     {
5831       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5832       int i;
5833
5834       len -= 4;
5835       charbuf += 4;
5836       for (i = 0; i < len; i++)
5837         args[i] = make_number (charbuf[i]);
5838       components = (method == COMPOSITION_WITH_ALTCHARS
5839                     ? Fstring (len, args) : Fvector (len, args));
5840     }
5841   compose_text (pos, to, components, Qnil, coding->dst_object);
5842 }
5843
5844
5845 /* Put `charset' property on text in CODING->object according to
5846    the annotation data at CHARBUF.  CHARBUF is an array:
5847      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
5848  */
5849
5850 static INLINE void
5851 produce_charset (coding, charbuf, pos)
5852      struct coding_system *coding;
5853      int *charbuf;
5854      EMACS_INT pos;
5855 {
5856   EMACS_INT from = pos - charbuf[2];
5857   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
5858
5859   Fput_text_property (make_number (from), make_number (pos),
5860                       Qcharset, CHARSET_NAME (charset),
5861                       coding->dst_object);
5862 }
5863
5864
5865 #define CHARBUF_SIZE 0x4000
5866
5867 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
5868   do {                                                                  \
5869     int size = CHARBUF_SIZE;;                                           \
5870                                                                         \
5871     coding->charbuf = NULL;                                             \
5872     while (size > 1024)                                                 \
5873       {                                                                 \
5874         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
5875         if (coding->charbuf)                                            \
5876           break;                                                        \
5877         size >>= 1;                                                     \
5878       }                                                                 \
5879     if (! coding->charbuf)                                              \
5880       {                                                                 \
5881         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
5882         return coding->result;                                          \
5883       }                                                                 \
5884     coding->charbuf_size = size;                                        \
5885   } while (0)
5886
5887
5888 static void
5889 produce_annotation (coding, pos)
5890      struct coding_system *coding;
5891      EMACS_INT pos;
5892 {
5893   int *charbuf = coding->charbuf;
5894   int *charbuf_end = charbuf + coding->charbuf_used;
5895
5896   if (NILP (coding->dst_object))
5897     return;
5898
5899   while (charbuf < charbuf_end)
5900     {
5901       if (*charbuf >= 0)
5902         pos += *charbuf++;
5903       else
5904         {
5905           int len = -*charbuf;
5906           switch (charbuf[1])
5907             {
5908             case CODING_ANNOTATE_COMPOSITION_MASK:
5909               produce_composition (coding, charbuf, pos);
5910               break;
5911             case CODING_ANNOTATE_CHARSET_MASK:
5912               produce_charset (coding, charbuf, pos);
5913               break;
5914             default:
5915               abort ();
5916             }
5917           charbuf += len;
5918         }
5919     }
5920 }
5921
5922 /* Decode the data at CODING->src_object into CODING->dst_object.
5923    CODING->src_object is a buffer, a string, or nil.
5924    CODING->dst_object is a buffer.
5925
5926    If CODING->src_object is a buffer, it must be the current buffer.
5927    In this case, if CODING->src_pos is positive, it is a position of
5928    the source text in the buffer, otherwise, the source text is in the
5929    gap area of the buffer, and CODING->src_pos specifies the offset of
5930    the text from GPT (which must be the same as PT).  If this is the
5931    same buffer as CODING->dst_object, CODING->src_pos must be
5932    negative.
5933
5934    If CODING->src_object is a string, CODING->src_pos in an index to
5935    that string.
5936
5937    If CODING->src_object is nil, CODING->source must already point to
5938    the non-relocatable memory area.  In this case, CODING->src_pos is
5939    an offset from CODING->source.
5940
5941    The decoded data is inserted at the current point of the buffer
5942    CODING->dst_object.
5943 */
5944
5945 static int
5946 decode_coding (coding)
5947      struct coding_system *coding;
5948 {
5949   Lisp_Object attrs;
5950   Lisp_Object undo_list;
5951   Lisp_Object translation_table;
5952   int carryover;
5953   int i;
5954
5955   if (BUFFERP (coding->src_object)
5956       && coding->src_pos > 0
5957       && coding->src_pos < GPT
5958       && coding->src_pos + coding->src_chars > GPT)
5959     move_gap_both (coding->src_pos, coding->src_pos_byte);
5960
5961   undo_list = Qt;
5962   if (BUFFERP (coding->dst_object))
5963     {
5964       if (current_buffer != XBUFFER (coding->dst_object))
5965         set_buffer_internal (XBUFFER (coding->dst_object));
5966       if (GPT != PT)
5967         move_gap_both (PT, PT_BYTE);
5968       undo_list = current_buffer->undo_list;
5969       current_buffer->undo_list = Qt;
5970     }
5971
5972   coding->consumed = coding->consumed_char = 0;
5973   coding->produced = coding->produced_char = 0;
5974   coding->chars_at_source = 0;
5975   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5976   coding->errors = 0;
5977
5978   ALLOC_CONVERSION_WORK_AREA (coding);
5979
5980   attrs = CODING_ID_ATTRS (coding->id);
5981   translation_table = get_translation_table (attrs, 0);
5982
5983   carryover = 0;
5984   do
5985     {
5986       EMACS_INT pos = coding->dst_pos + coding->produced_char;
5987
5988       coding_set_source (coding);
5989       coding->annotated = 0;
5990       coding->charbuf_used = carryover;
5991       (*(coding->decoder)) (coding);
5992       coding_set_destination (coding);
5993       carryover = produce_chars (coding, translation_table, 0);
5994       if (coding->annotated)
5995         produce_annotation (coding, pos);
5996       for (i = 0; i < carryover; i++)
5997         coding->charbuf[i]
5998           = coding->charbuf[coding->charbuf_used - carryover + i];
5999     }
6000   while (coding->consumed < coding->src_bytes
6001          && ! coding->result);
6002
6003   if (carryover > 0)
6004     {
6005       coding_set_destination (coding);
6006       coding->charbuf_used = carryover;
6007       produce_chars (coding, translation_table, 1);
6008     }
6009
6010   coding->carryover_bytes = 0;
6011   if (coding->consumed < coding->src_bytes)
6012     {
6013       int nbytes = coding->src_bytes - coding->consumed;
6014       const unsigned char *src;
6015
6016       coding_set_source (coding);
6017       coding_set_destination (coding);
6018       src = coding->source + coding->consumed;
6019
6020       if (coding->mode & CODING_MODE_LAST_BLOCK)
6021         {
6022           /* Flush out unprocessed data as binary chars.  We are sure
6023              that the number of data is less than the size of
6024              coding->charbuf.  */
6025           coding->charbuf_used = 0;
6026           while (nbytes-- > 0)
6027             {
6028               int c = *src++;
6029
6030               coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
6031             }
6032           produce_chars (coding);
6033         }
6034       else
6035         {
6036           /* Record unprocessed bytes in coding->carryover.  We are
6037              sure that the number of data is less than the size of
6038              coding->carryover.  */
6039           unsigned char *p = coding->carryover;
6040
6041           coding->carryover_bytes = nbytes;
6042           while (nbytes-- > 0)
6043             *p++ = *src++;
6044         }
6045       coding->consumed = coding->src_bytes;
6046     }
6047
6048   if (BUFFERP (coding->dst_object))
6049     {
6050       current_buffer->undo_list = undo_list;
6051       record_insert (coding->dst_pos, coding->produced_char);
6052     }
6053   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6054     decode_eol (coding);
6055   return coding->result;
6056 }
6057
6058
6059 /* Extract an annotation datum from a composition starting at POS and
6060    ending before LIMIT of CODING->src_object (buffer or string), store
6061    the data in BUF, set *STOP to a starting position of the next
6062    composition (if any) or to LIMIT, and return the address of the
6063    next element of BUF.
6064
6065    If such an annotation is not found, set *STOP to a starting
6066    position of a composition after POS (if any) or to LIMIT, and
6067    return BUF.  */
6068
6069 static INLINE int *
6070 handle_composition_annotation (pos, limit, coding, buf, stop)
6071      EMACS_INT pos, limit;
6072      struct coding_system *coding;
6073      int *buf;
6074      EMACS_INT *stop;
6075 {
6076   EMACS_INT start, end;
6077   Lisp_Object prop;
6078
6079   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6080       || end > limit)
6081     *stop = limit;
6082   else if (start > pos)
6083     *stop = start;
6084   else
6085     {
6086       if (start == pos)
6087         {
6088           /* We found a composition.  Store the corresponding
6089              annotation data in BUF.  */
6090           int *head = buf;
6091           enum composition_method method = COMPOSITION_METHOD (prop);
6092           int nchars = COMPOSITION_LENGTH (prop);
6093
6094           ADD_COMPOSITION_DATA (buf, nchars, method);
6095           if (method != COMPOSITION_RELATIVE)
6096             {
6097               Lisp_Object components;
6098               int len, i, i_byte;
6099
6100               components = COMPOSITION_COMPONENTS (prop);
6101               if (VECTORP (components))
6102                 {
6103                   len = XVECTOR (components)->size;
6104                   for (i = 0; i < len; i++)
6105                     *buf++ = XINT (AREF (components, i));
6106                 }
6107               else if (STRINGP (components))
6108                 {
6109                   len = SCHARS (components);
6110                   i = i_byte = 0;
6111                   while (i < len)
6112                     {
6113                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6114                       buf++;
6115                     }
6116                 }
6117               else if (INTEGERP (components))
6118                 {
6119                   len = 1;
6120                   *buf++ = XINT (components);
6121                 }
6122               else if (CONSP (components))
6123                 {
6124                   for (len = 0; CONSP (components);
6125                        len++, components = XCDR (components))
6126                     *buf++ = XINT (XCAR (components));
6127                 }
6128               else
6129                 abort ();
6130               *head -= len;
6131             }
6132         }
6133
6134       if (find_composition (end, limit, &start, &end, &prop,
6135                             coding->src_object)
6136           && end <= limit)
6137         *stop = start;
6138       else
6139         *stop = limit;
6140     }
6141   return buf;
6142 }
6143
6144
6145 /* Extract an annotation datum from a text property `charset' at POS of
6146    CODING->src_object (buffer of string), store the data in BUF, set
6147    *STOP to the position where the value of `charset' property changes
6148    (limiting by LIMIT), and return the address of the next element of
6149    BUF.
6150
6151    If the property value is nil, set *STOP to the position where the
6152    property value is non-nil (limiting by LIMIT), and return BUF.  */
6153
6154 static INLINE int *
6155 handle_charset_annotation (pos, limit, coding, buf, stop)
6156      EMACS_INT pos, limit;
6157      struct coding_system *coding;
6158      int *buf;
6159      EMACS_INT *stop;
6160 {
6161   Lisp_Object val, next;
6162   int id;
6163
6164   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6165   if (! NILP (val) && CHARSETP (val))
6166     id = XINT (CHARSET_SYMBOL_ID (val));
6167   else
6168     id = -1;
6169   ADD_CHARSET_DATA (buf, 0, id);
6170   next = Fnext_single_property_change (make_number (pos), Qcharset,
6171                                        coding->src_object,
6172                                        make_number (limit));
6173   *stop = XINT (next);
6174   return buf;
6175 }
6176
6177
6178 static void
6179 consume_chars (coding, translation_table)
6180      struct coding_system *coding;
6181      Lisp_Object translation_table;
6182 {
6183   int *buf = coding->charbuf;
6184   int *buf_end = coding->charbuf + coding->charbuf_size;
6185   const unsigned char *src = coding->source + coding->consumed;
6186   const unsigned char *src_end = coding->source + coding->src_bytes;
6187   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6188   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6189   int multibytep = coding->src_multibyte;
6190   Lisp_Object eol_type;
6191   int c;
6192   EMACS_INT stop, stop_composition, stop_charset;
6193   int max_lookup = 0, *lookup_buf = NULL;
6194
6195   if (! NILP (translation_table))
6196     {
6197       max_lookup = XINT (XCHAR_TABLE (translation_table)->extras[1]);
6198       lookup_buf = alloca (sizeof (int) * max_lookup);
6199     }
6200
6201   eol_type = CODING_ID_EOL_TYPE (coding->id);
6202   if (VECTORP (eol_type))
6203     eol_type = Qunix;
6204
6205   /* Note: composition handling is not yet implemented.  */
6206   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6207
6208   if (NILP (coding->src_object))
6209     stop = stop_composition = stop_charset = end_pos;
6210   else
6211     {
6212       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6213         stop = stop_composition = pos;
6214       else
6215         stop = stop_composition = end_pos;
6216       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6217         stop = stop_charset = pos;
6218       else
6219         stop_charset = end_pos;
6220     }
6221
6222   /* Compensate for CRLF and conversion.  */
6223   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6224   while (buf < buf_end)
6225     {
6226       Lisp_Object trans;
6227
6228       if (pos == stop)
6229         {
6230           if (pos == end_pos)
6231             break;
6232           if (pos == stop_composition)
6233             buf = handle_composition_annotation (pos, end_pos, coding,
6234                                                  buf, &stop_composition);
6235           if (pos == stop_charset)
6236             buf = handle_charset_annotation (pos, end_pos, coding,
6237                                              buf, &stop_charset);
6238           stop = (stop_composition < stop_charset
6239                   ? stop_composition : stop_charset);
6240         }
6241
6242       if (! multibytep)
6243         {
6244           EMACS_INT bytes;
6245
6246           if (! CODING_FOR_UNIBYTE (coding)
6247               && (bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6248             c = STRING_CHAR_ADVANCE (src), pos += bytes;
6249           else
6250             c = *src++, pos++;
6251         }
6252       else
6253         c = STRING_CHAR_ADVANCE (src), pos++;
6254       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6255         c = '\n';
6256       if (! EQ (eol_type, Qunix))
6257         {
6258           if (c == '\n')
6259             {
6260               if (EQ (eol_type, Qdos))
6261                 *buf++ = '\r';
6262               else
6263                 c = '\r';
6264             }
6265         }
6266
6267       if (NILP (translation_table)
6268           || NILP (trans = CHAR_TABLE_REF (translation_table, c)))
6269         *buf++ = c;
6270       else
6271         {
6272           int from_nchars = 1, to_nchars = 1;
6273           int *lookup_buf_end;
6274           const unsigned char *p = src;
6275           int i;
6276
6277           lookup_buf[0] = c;
6278           for (i = 1; i < max_lookup && p < src_end; i++)
6279             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6280           lookup_buf_end = lookup_buf + i;
6281           trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6282                                    &from_nchars, &to_nchars);
6283           if (EQ (trans, Qt)
6284               || buf + to_nchars > buf_end)
6285             break;
6286           *buf++ = *lookup_buf;
6287           for (i = 1; i < to_nchars; i++)
6288             *buf++ = XINT (AREF (trans, i));
6289           for (i = 1; i < from_nchars; i++, pos++)
6290             src += MULTIBYTE_LENGTH_NO_CHECK (src);
6291         }
6292     }
6293
6294   coding->consumed = src - coding->source;
6295   coding->consumed_char = pos - coding->src_pos;
6296   coding->charbuf_used = buf - coding->charbuf;
6297   coding->chars_at_source = 0;
6298 }
6299
6300
6301 /* Encode the text at CODING->src_object into CODING->dst_object.
6302    CODING->src_object is a buffer or a string.
6303    CODING->dst_object is a buffer or nil.
6304
6305    If CODING->src_object is a buffer, it must be the current buffer.
6306    In this case, if CODING->src_pos is positive, it is a position of
6307    the source text in the buffer, otherwise. the source text is in the
6308    gap area of the buffer, and coding->src_pos specifies the offset of
6309    the text from GPT (which must be the same as PT).  If this is the
6310    same buffer as CODING->dst_object, CODING->src_pos must be
6311    negative and CODING should not have `pre-write-conversion'.
6312
6313    If CODING->src_object is a string, CODING should not have
6314    `pre-write-conversion'.
6315
6316    If CODING->dst_object is a buffer, the encoded data is inserted at
6317    the current point of that buffer.
6318
6319    If CODING->dst_object is nil, the encoded data is placed at the
6320    memory area specified by CODING->destination.  */
6321
6322 static int
6323 encode_coding (coding)
6324      struct coding_system *coding;
6325 {
6326   Lisp_Object attrs;
6327   Lisp_Object translation_table;
6328
6329   attrs = CODING_ID_ATTRS (coding->id);
6330   translation_table = get_translation_table (attrs, 1);
6331
6332   if (BUFFERP (coding->dst_object))
6333     {
6334       set_buffer_internal (XBUFFER (coding->dst_object));
6335       coding->dst_multibyte
6336         = ! NILP (current_buffer->enable_multibyte_characters);
6337     }
6338
6339   coding->consumed = coding->consumed_char = 0;
6340   coding->produced = coding->produced_char = 0;
6341   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6342   coding->errors = 0;
6343
6344   ALLOC_CONVERSION_WORK_AREA (coding);
6345
6346   do {
6347     coding_set_source (coding);
6348     consume_chars (coding, translation_table);
6349     coding_set_destination (coding);
6350     (*(coding->encoder)) (coding);
6351   } while (coding->consumed_char < coding->src_chars);
6352
6353   if (BUFFERP (coding->dst_object))
6354     insert_from_gap (coding->produced_char, coding->produced);
6355
6356   return (coding->result);
6357 }
6358
6359
6360 /* Name (or base name) of work buffer for code conversion.  */
6361 static Lisp_Object Vcode_conversion_workbuf_name;
6362
6363 /* A working buffer used by the top level conversion.  Once it is
6364    created, it is never destroyed.  It has the name
6365    Vcode_conversion_workbuf_name.  The other working buffers are
6366    destroyed after the use is finished, and their names are modified
6367    versions of Vcode_conversion_workbuf_name.  */
6368 static Lisp_Object Vcode_conversion_reused_workbuf;
6369
6370 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
6371 static int reused_workbuf_in_use;
6372
6373
6374 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
6375    multibyteness of returning buffer.  */
6376
6377 Lisp_Object
6378 make_conversion_work_buffer (multibyte)
6379 {
6380   Lisp_Object name, workbuf;
6381   struct buffer *current;
6382
6383   if (reused_workbuf_in_use++)
6384     {
6385       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6386       workbuf = Fget_buffer_create (name);
6387     }
6388   else
6389     {
6390       name = Vcode_conversion_workbuf_name;
6391       workbuf = Fget_buffer_create (name);
6392       if (NILP (Vcode_conversion_reused_workbuf))
6393         Vcode_conversion_reused_workbuf = workbuf;
6394     }
6395   current = current_buffer;
6396   set_buffer_internal (XBUFFER (workbuf));
6397   Ferase_buffer ();
6398   current_buffer->undo_list = Qt;
6399   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
6400   set_buffer_internal (current);
6401   return workbuf;
6402 }
6403
6404
6405 static Lisp_Object
6406 code_conversion_restore (arg)
6407      Lisp_Object arg;
6408 {
6409   Lisp_Object current, workbuf;
6410
6411   current = XCAR (arg);
6412   workbuf = XCDR (arg);
6413   if (! NILP (workbuf))
6414     {
6415       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6416         reused_workbuf_in_use = 0;
6417       else if (! NILP (Fbuffer_live_p (workbuf)))
6418         Fkill_buffer (workbuf);
6419     }
6420   set_buffer_internal (XBUFFER (current));
6421   return Qnil;
6422 }
6423
6424 Lisp_Object
6425 code_conversion_save (with_work_buf, multibyte)
6426      int with_work_buf, multibyte;
6427 {
6428   Lisp_Object workbuf = Qnil;
6429
6430   if (with_work_buf)
6431     workbuf = make_conversion_work_buffer (multibyte);
6432   record_unwind_protect (code_conversion_restore,
6433                          Fcons (Fcurrent_buffer (), workbuf));
6434   return workbuf;
6435 }
6436
6437 int
6438 decode_coding_gap (coding, chars, bytes)
6439      struct coding_system *coding;
6440      EMACS_INT chars, bytes;
6441 {
6442   int count = specpdl_ptr - specpdl;
6443   Lisp_Object attrs;
6444
6445   code_conversion_save (0, 0);
6446
6447   coding->src_object = Fcurrent_buffer ();
6448   coding->src_chars = chars;
6449   coding->src_bytes = bytes;
6450   coding->src_pos = -chars;
6451   coding->src_pos_byte = -bytes;
6452   coding->src_multibyte = chars < bytes;
6453   coding->dst_object = coding->src_object;
6454   coding->dst_pos = PT;
6455   coding->dst_pos_byte = PT_BYTE;
6456   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6457   coding->mode |= CODING_MODE_LAST_BLOCK;
6458
6459   if (CODING_REQUIRE_DETECTION (coding))
6460     detect_coding (coding);
6461
6462   decode_coding (coding);
6463
6464   attrs = CODING_ID_ATTRS (coding->id);
6465   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6466     {
6467       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6468       Lisp_Object val;
6469
6470       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6471       val = call1 (CODING_ATTR_POST_READ (attrs),
6472                    make_number (coding->produced_char));
6473       CHECK_NATNUM (val);
6474       coding->produced_char += Z - prev_Z;
6475       coding->produced += Z_BYTE - prev_Z_BYTE;
6476     }
6477
6478   unbind_to (count, Qnil);
6479   return coding->result;
6480 }
6481
6482 int
6483 encode_coding_gap (coding, chars, bytes)
6484      struct coding_system *coding;
6485      EMACS_INT chars, bytes;
6486 {
6487   int count = specpdl_ptr - specpdl;
6488
6489   code_conversion_save (0, 0);
6490
6491   coding->src_object = Fcurrent_buffer ();
6492   coding->src_chars = chars;
6493   coding->src_bytes = bytes;
6494   coding->src_pos = -chars;
6495   coding->src_pos_byte = -bytes;
6496   coding->src_multibyte = chars < bytes;
6497   coding->dst_object = coding->src_object;
6498   coding->dst_pos = PT;
6499   coding->dst_pos_byte = PT_BYTE;
6500
6501   encode_coding (coding);
6502
6503   unbind_to (count, Qnil);
6504   return coding->result;
6505 }
6506
6507
6508 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6509    SRC_OBJECT into DST_OBJECT by coding context CODING.
6510
6511    SRC_OBJECT is a buffer, a string, or Qnil.
6512
6513    If it is a buffer, the text is at point of the buffer.  FROM and TO
6514    are positions in the buffer.
6515
6516    If it is a string, the text is at the beginning of the string.
6517    FROM and TO are indices to the string.
6518
6519    If it is nil, the text is at coding->source.  FROM and TO are
6520    indices to coding->source.
6521
6522    DST_OBJECT is a buffer, Qt, or Qnil.
6523
6524    If it is a buffer, the decoded text is inserted at point of the
6525    buffer.  If the buffer is the same as SRC_OBJECT, the source text
6526    is deleted.
6527
6528    If it is Qt, a string is made from the decoded text, and
6529    set in CODING->dst_object.
6530
6531    If it is Qnil, the decoded text is stored at CODING->destination.
6532    The caller must allocate CODING->dst_bytes bytes at
6533    CODING->destination by xmalloc.  If the decoded text is longer than
6534    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6535  */
6536
6537 void
6538 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6539                       dst_object)
6540      struct coding_system *coding;
6541      Lisp_Object src_object;
6542      EMACS_INT from, from_byte, to, to_byte;
6543      Lisp_Object dst_object;
6544 {
6545   int count = specpdl_ptr - specpdl;
6546   unsigned char *destination;
6547   EMACS_INT dst_bytes;
6548   EMACS_INT chars = to - from;
6549   EMACS_INT bytes = to_byte - from_byte;
6550   Lisp_Object attrs;
6551   Lisp_Object buffer;
6552   int saved_pt = -1, saved_pt_byte;
6553
6554   buffer = Fcurrent_buffer ();
6555
6556   if (NILP (dst_object))
6557     {
6558       destination = coding->destination;
6559       dst_bytes = coding->dst_bytes;
6560     }
6561
6562   coding->src_object = src_object;
6563   coding->src_chars = chars;
6564   coding->src_bytes = bytes;
6565   coding->src_multibyte = chars < bytes;
6566
6567   if (STRINGP (src_object))
6568     {
6569       coding->src_pos = from;
6570       coding->src_pos_byte = from_byte;
6571     }
6572   else if (BUFFERP (src_object))
6573     {
6574       set_buffer_internal (XBUFFER (src_object));
6575       if (from != GPT)
6576         move_gap_both (from, from_byte);
6577       if (EQ (src_object, dst_object))
6578         {
6579           saved_pt = PT, saved_pt_byte = PT_BYTE;
6580           TEMP_SET_PT_BOTH (from, from_byte);
6581           del_range_both (from, from_byte, to, to_byte, 1);
6582           coding->src_pos = -chars;
6583           coding->src_pos_byte = -bytes;
6584         }
6585       else
6586         {
6587           coding->src_pos = from;
6588           coding->src_pos_byte = from_byte;
6589         }
6590     }
6591
6592   if (CODING_REQUIRE_DETECTION (coding))
6593     detect_coding (coding);
6594   attrs = CODING_ID_ATTRS (coding->id);
6595
6596   if (EQ (dst_object, Qt)
6597       || (! NILP (CODING_ATTR_POST_READ (attrs))
6598           && NILP (dst_object)))
6599     {
6600       coding->dst_object = code_conversion_save (1, 1);
6601       coding->dst_pos = BEG;
6602       coding->dst_pos_byte = BEG_BYTE;
6603       coding->dst_multibyte = 1;
6604     }
6605   else if (BUFFERP (dst_object))
6606     {
6607       code_conversion_save (0, 0);
6608       coding->dst_object = dst_object;
6609       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6610       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6611       coding->dst_multibyte
6612         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6613     }
6614   else
6615     {
6616       code_conversion_save (0, 0);
6617       coding->dst_object = Qnil;
6618       coding->dst_multibyte = 1;
6619     }
6620
6621   decode_coding (coding);
6622
6623   if (BUFFERP (coding->dst_object))
6624     set_buffer_internal (XBUFFER (coding->dst_object));
6625
6626   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6627     {
6628       struct gcpro gcpro1, gcpro2;
6629       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6630       Lisp_Object val;
6631
6632       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6633       GCPRO2 (coding->src_object, coding->dst_object);
6634       val = call1 (CODING_ATTR_POST_READ (attrs),
6635                    make_number (coding->produced_char));
6636       UNGCPRO;
6637       CHECK_NATNUM (val);
6638       coding->produced_char += Z - prev_Z;
6639       coding->produced += Z_BYTE - prev_Z_BYTE;
6640     }
6641
6642   if (EQ (dst_object, Qt))
6643     {
6644       coding->dst_object = Fbuffer_string ();
6645     }
6646   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6647     {
6648       set_buffer_internal (XBUFFER (coding->dst_object));
6649       if (dst_bytes < coding->produced)
6650         {
6651           destination
6652             = (unsigned char *) xrealloc (destination, coding->produced);
6653           if (! destination)
6654             {
6655               record_conversion_result (coding,
6656                                         CODING_RESULT_INSUFFICIENT_DST);
6657               unbind_to (count, Qnil);
6658               return;
6659             }
6660           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6661             move_gap_both (BEGV, BEGV_BYTE);
6662           bcopy (BEGV_ADDR, destination, coding->produced);
6663           coding->destination = destination;
6664         }
6665     }
6666
6667   if (saved_pt >= 0)
6668     {
6669       /* This is the case of:
6670          (BUFFERP (src_object) && EQ (src_object, dst_object))
6671          As we have moved PT while replacing the original buffer
6672          contents, we must recover it now.  */
6673       set_buffer_internal (XBUFFER (src_object));
6674       if (saved_pt < from)
6675         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6676       else if (saved_pt < from + chars)
6677         TEMP_SET_PT_BOTH (from, from_byte);
6678       else if (! NILP (current_buffer->enable_multibyte_characters))
6679         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6680                           saved_pt_byte + (coding->produced - bytes));
6681       else
6682         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6683                           saved_pt_byte + (coding->produced - bytes));
6684     }
6685
6686   unbind_to (count, coding->dst_object);
6687 }
6688
6689
6690 void
6691 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6692                       dst_object)
6693      struct coding_system *coding;
6694      Lisp_Object src_object;
6695      EMACS_INT from, from_byte, to, to_byte;
6696      Lisp_Object dst_object;
6697 {
6698   int count = specpdl_ptr - specpdl;
6699   EMACS_INT chars = to - from;
6700   EMACS_INT bytes = to_byte - from_byte;
6701   Lisp_Object attrs;
6702   Lisp_Object buffer;
6703   int saved_pt = -1, saved_pt_byte;
6704
6705   buffer = Fcurrent_buffer ();
6706
6707   coding->src_object = src_object;
6708   coding->src_chars = chars;
6709   coding->src_bytes = bytes;
6710   coding->src_multibyte = chars < bytes;
6711
6712   attrs = CODING_ID_ATTRS (coding->id);
6713
6714   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6715     {
6716       coding->src_object = code_conversion_save (1, coding->src_multibyte);
6717       set_buffer_internal (XBUFFER (coding->src_object));
6718       if (STRINGP (src_object))
6719         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6720       else if (BUFFERP (src_object))
6721         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6722       else
6723         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6724
6725       if (EQ (src_object, dst_object))
6726         {
6727           set_buffer_internal (XBUFFER (src_object));
6728           saved_pt = PT, saved_pt_byte = PT_BYTE;
6729           del_range_both (from, from_byte, to, to_byte, 1);
6730           set_buffer_internal (XBUFFER (coding->src_object));
6731         }
6732
6733       call2 (CODING_ATTR_PRE_WRITE (attrs),
6734              make_number (BEG), make_number (Z));
6735       coding->src_object = Fcurrent_buffer ();
6736       if (BEG != GPT)
6737         move_gap_both (BEG, BEG_BYTE);
6738       coding->src_chars = Z - BEG;
6739       coding->src_bytes = Z_BYTE - BEG_BYTE;
6740       coding->src_pos = BEG;
6741       coding->src_pos_byte = BEG_BYTE;
6742       coding->src_multibyte = Z < Z_BYTE;
6743     }
6744   else if (STRINGP (src_object))
6745     {
6746       code_conversion_save (0, 0);
6747       coding->src_pos = from;
6748       coding->src_pos_byte = from_byte;
6749     }
6750   else if (BUFFERP (src_object))
6751     {
6752       code_conversion_save (0, 0);
6753       set_buffer_internal (XBUFFER (src_object));
6754       if (EQ (src_object, dst_object))
6755         {
6756           saved_pt = PT, saved_pt_byte = PT_BYTE;
6757           coding->src_object = del_range_1 (from, to, 1, 1);
6758           coding->src_pos = 0;
6759           coding->src_pos_byte = 0;
6760         }
6761       else
6762         {
6763           if (from < GPT && to >= GPT)
6764             move_gap_both (from, from_byte);
6765           coding->src_pos = from;
6766           coding->src_pos_byte = from_byte;
6767         }
6768     }
6769   else
6770     code_conversion_save (0, 0);
6771
6772   if (BUFFERP (dst_object))
6773     {
6774       coding->dst_object = dst_object;
6775       if (EQ (src_object, dst_object))
6776         {
6777           coding->dst_pos = from;
6778           coding->dst_pos_byte = from_byte;
6779         }
6780       else
6781         {
6782           coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6783           coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6784         }
6785       coding->dst_multibyte
6786         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6787     }
6788   else if (EQ (dst_object, Qt))
6789     {
6790       coding->dst_object = Qnil;
6791       coding->dst_bytes = coding->src_chars;
6792       if (coding->dst_bytes == 0)
6793         coding->dst_bytes = 1;
6794       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
6795       coding->dst_multibyte = 0;
6796     }
6797   else
6798     {
6799       coding->dst_object = Qnil;
6800       coding->dst_multibyte = 0;
6801     }
6802
6803   encode_coding (coding);
6804
6805   if (EQ (dst_object, Qt))
6806     {
6807       if (BUFFERP (coding->dst_object))
6808         coding->dst_object = Fbuffer_string ();
6809       else
6810         {
6811           coding->dst_object
6812             = make_unibyte_string ((char *) coding->destination,
6813                                    coding->produced);
6814           xfree (coding->destination);
6815         }
6816     }
6817
6818   if (saved_pt >= 0)
6819     {
6820       /* This is the case of:
6821          (BUFFERP (src_object) && EQ (src_object, dst_object))
6822          As we have moved PT while replacing the original buffer
6823          contents, we must recover it now.  */
6824       set_buffer_internal (XBUFFER (src_object));
6825       if (saved_pt < from)
6826         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6827       else if (saved_pt < from + chars)
6828         TEMP_SET_PT_BOTH (from, from_byte);
6829       else if (! NILP (current_buffer->enable_multibyte_characters))
6830         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6831                           saved_pt_byte + (coding->produced - bytes));
6832       else
6833         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6834                           saved_pt_byte + (coding->produced - bytes));
6835     }
6836
6837   unbind_to (count, Qnil);
6838 }
6839
6840
6841 Lisp_Object
6842 preferred_coding_system ()
6843 {
6844   int id = coding_categories[coding_priorities[0]].id;
6845
6846   return CODING_ID_NAME (id);
6847 }
6848
6849 \f
6850 #ifdef emacs
6851 /*** 8. Emacs Lisp library functions ***/
6852
6853 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6854        doc: /* Return t if OBJECT is nil or a coding-system.
6855 See the documentation of `define-coding-system' for information
6856 about coding-system objects.  */)
6857      (obj)
6858      Lisp_Object obj;
6859 {
6860   return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6861 }
6862
6863 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6864        Sread_non_nil_coding_system, 1, 1, 0,
6865        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6866      (prompt)
6867      Lisp_Object prompt;
6868 {
6869   Lisp_Object val;
6870   do
6871     {
6872       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6873                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6874     }
6875   while (SCHARS (val) == 0);
6876   return (Fintern (val, Qnil));
6877 }
6878
6879 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6880        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6881 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6882      (prompt, default_coding_system)
6883      Lisp_Object prompt, default_coding_system;
6884 {
6885   Lisp_Object val;
6886   if (SYMBOLP (default_coding_system))
6887     XSETSTRING (default_coding_system, XPNTR (SYMBOL_NAME (default_coding_system)));
6888   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6889                           Qt, Qnil, Qcoding_system_history,
6890                           default_coding_system, Qnil);
6891   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
6892 }
6893
6894 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6895        1, 1, 0,
6896        doc: /* Check validity of CODING-SYSTEM.
6897 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.  */)
6898   (coding_system)
6899      Lisp_Object coding_system;
6900 {
6901   CHECK_SYMBOL (coding_system);
6902   if (!NILP (Fcoding_system_p (coding_system)))
6903     return coding_system;
6904   while (1)
6905     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6906 }
6907
6908 \f
6909 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
6910    HIGHEST is nonzero, return the coding system of the highest
6911    priority among the detected coding systems.  Otherwize return a
6912    list of detected coding systems sorted by their priorities.  If
6913    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6914    multibyte form but contains only ASCII and eight-bit chars.
6915    Otherwise, the bytes are raw bytes.
6916
6917    CODING-SYSTEM controls the detection as below:
6918
6919    If it is nil, detect both text-format and eol-format.  If the
6920    text-format part of CODING-SYSTEM is already specified
6921    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
6922    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6923    detect only text-format.  */
6924
6925 Lisp_Object
6926 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
6927                       coding_system)
6928      const unsigned char *src;
6929      int src_chars, src_bytes, highest;
6930      int multibytep;
6931      Lisp_Object coding_system;
6932 {
6933   const unsigned char *src_end = src + src_bytes;
6934   Lisp_Object attrs, eol_type;
6935   Lisp_Object val;
6936   struct coding_system coding;
6937   int id;
6938   struct coding_detection_info detect_info;
6939   enum coding_category base_category;
6940
6941   if (NILP (coding_system))
6942     coding_system = Qundecided;
6943   setup_coding_system (coding_system, &coding);
6944   attrs = CODING_ID_ATTRS (coding.id);
6945   eol_type = CODING_ID_EOL_TYPE (coding.id);
6946   coding_system = CODING_ATTR_BASE_NAME (attrs);
6947
6948   coding.source = src;
6949   coding.src_chars = src_chars;
6950   coding.src_bytes = src_bytes;
6951   coding.src_multibyte = multibytep;
6952   coding.consumed = 0;
6953   coding.mode |= CODING_MODE_LAST_BLOCK;
6954
6955   detect_info.checked = detect_info.found = detect_info.rejected = 0;
6956
6957   /* At first, detect text-format if necessary.  */
6958   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
6959   if (base_category == coding_category_undecided)
6960     {
6961       enum coding_category category;
6962       struct coding_system *this;
6963       int c, i;
6964
6965       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
6966       for (i = 0; src < src_end; i++, src++)
6967         {
6968           c = *src;
6969           if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
6970                                         || c == ISO_CODE_SI
6971                                         || c == ISO_CODE_SO)))
6972             break;
6973         }
6974       coding.head_ascii = src - coding.source;
6975
6976       if (src < src_end)
6977         for (i = 0; i < coding_category_raw_text; i++)
6978           {
6979             category = coding_priorities[i];
6980             this = coding_categories + category;
6981
6982             if (this->id < 0)
6983               {
6984                 /* No coding system of this category is defined.  */
6985                 detect_info.rejected |= (1 << category);
6986               }
6987             else if (category >= coding_category_raw_text)
6988               continue;
6989             else if (detect_info.checked & (1 << category))
6990               {
6991                 if (highest
6992                     && (detect_info.found & (1 << category)))
6993                   break;
6994               }
6995             else
6996               {
6997                 if ((*(this->detector)) (&coding, &detect_info)
6998                     && highest
6999                     && (detect_info.found & (1 << category)))
7000                   {
7001                     if (category == coding_category_utf_16_auto)
7002                       {
7003                         if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7004                           category = coding_category_utf_16_le;
7005                         else
7006                           category = coding_category_utf_16_be;
7007                       }
7008                     break;
7009                   }
7010               }
7011           }
7012
7013       if (detect_info.rejected == CATEGORY_MASK_ANY)
7014         {
7015           detect_info.found = CATEGORY_MASK_RAW_TEXT;
7016           id = coding_categories[coding_category_raw_text].id;
7017           val = Fcons (make_number (id), Qnil);
7018         }
7019       else if (! detect_info.rejected && ! detect_info.found)
7020         {
7021           detect_info.found = CATEGORY_MASK_ANY;
7022           id = coding_categories[coding_category_undecided].id;
7023           val = Fcons (make_number (id), Qnil);
7024         }
7025       else if (highest)
7026         {
7027           if (detect_info.found)
7028             {
7029               detect_info.found = 1 << category;
7030               val = Fcons (make_number (this->id), Qnil);
7031             }
7032           else
7033             for (i = 0; i < coding_category_raw_text; i++)
7034               if (! (detect_info.rejected & (1 << coding_priorities[i])))
7035                 {
7036                   detect_info.found = 1 << coding_priorities[i];
7037                   id = coding_categories[coding_priorities[i]].id;
7038                   val = Fcons (make_number (id), Qnil);
7039                   break;
7040                 }
7041         }
7042       else
7043         {
7044           int mask = detect_info.rejected | detect_info.found;
7045           int found = 0;
7046           val = Qnil;
7047
7048           for (i = coding_category_raw_text - 1; i >= 0; i--)
7049             {
7050               category = coding_priorities[i];
7051               if (! (mask & (1 << category)))
7052                 {
7053                   found |= 1 << category;
7054                   id = coding_categories[category].id;
7055                   val = Fcons (make_number (id), val);
7056                 }
7057             }
7058           for (i = coding_category_raw_text - 1; i >= 0; i--)
7059             {
7060               category = coding_priorities[i];
7061               if (detect_info.found & (1 << category))
7062                 {
7063                   id = coding_categories[category].id;
7064                   val = Fcons (make_number (id), val);
7065                 }
7066             }
7067           detect_info.found |= found;
7068         }
7069     }
7070   else if (base_category == coding_category_utf_16_auto)
7071     {
7072       if (detect_coding_utf_16 (&coding, &detect_info))
7073         {
7074           enum coding_category category;
7075           struct coding_system *this;
7076
7077           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7078             this = coding_categories + coding_category_utf_16_le;
7079           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7080             this = coding_categories + coding_category_utf_16_be;
7081           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7082             this = coding_categories + coding_category_utf_16_be_nosig;
7083           else
7084             this = coding_categories + coding_category_utf_16_le_nosig;
7085           val = Fcons (make_number (this->id), Qnil);
7086         }
7087     }
7088   else
7089     {
7090       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
7091       val = Fcons (make_number (coding.id), Qnil);
7092     }
7093
7094   /* Then, detect eol-format if necessary.  */
7095   {
7096     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
7097     Lisp_Object tail;
7098
7099     if (VECTORP (eol_type))
7100       {
7101         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
7102           normal_eol = detect_eol (coding.source, src_bytes,
7103                                    coding_category_raw_text);
7104         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7105                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
7106           utf_16_be_eol = detect_eol (coding.source, src_bytes,
7107                                       coding_category_utf_16_be);
7108         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7109                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
7110           utf_16_le_eol = detect_eol (coding.source, src_bytes,
7111                                       coding_category_utf_16_le);
7112       }
7113     else
7114       {
7115         if (EQ (eol_type, Qunix))
7116           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7117         else if (EQ (eol_type, Qdos))
7118           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7119         else
7120           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7121       }
7122
7123     for (tail = val; CONSP (tail); tail = XCDR (tail))
7124       {
7125         enum coding_category category;
7126         int this_eol;
7127
7128         id = XINT (XCAR (tail));
7129         attrs = CODING_ID_ATTRS (id);
7130         category = XINT (CODING_ATTR_CATEGORY (attrs));
7131         eol_type = CODING_ID_EOL_TYPE (id);
7132         if (VECTORP (eol_type))
7133           {
7134             if (category == coding_category_utf_16_be
7135                 || category == coding_category_utf_16_be_nosig)
7136               this_eol = utf_16_be_eol;
7137             else if (category == coding_category_utf_16_le
7138                      || category == coding_category_utf_16_le_nosig)
7139               this_eol = utf_16_le_eol;
7140             else
7141               this_eol = normal_eol;
7142
7143             if (this_eol == EOL_SEEN_LF)
7144               XSETCAR (tail, AREF (eol_type, 0));
7145             else if (this_eol == EOL_SEEN_CRLF)
7146               XSETCAR (tail, AREF (eol_type, 1));
7147             else if (this_eol == EOL_SEEN_CR)
7148               XSETCAR (tail, AREF (eol_type, 2));
7149             else
7150               XSETCAR (tail, CODING_ID_NAME (id));
7151           }
7152         else
7153           XSETCAR (tail, CODING_ID_NAME (id));
7154       }
7155   }
7156
7157   return (highest ? XCAR (val) : val);
7158 }
7159
7160
7161 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7162        2, 3, 0,
7163        doc: /* Detect coding system of the text in the region between START and END.
7164 Return a list of possible coding systems ordered by priority.
7165
7166 If only ASCII characters are found, it returns a list of single element
7167 `undecided' or its subsidiary coding system according to a detected
7168 end-of-line format.
7169
7170 If optional argument HIGHEST is non-nil, return the coding system of
7171 highest priority.  */)
7172      (start, end, highest)
7173      Lisp_Object start, end, highest;
7174 {
7175   int from, to;
7176   int from_byte, to_byte;
7177
7178   CHECK_NUMBER_COERCE_MARKER (start);
7179   CHECK_NUMBER_COERCE_MARKER (end);
7180
7181   validate_region (&start, &end);
7182   from = XINT (start), to = XINT (end);
7183   from_byte = CHAR_TO_BYTE (from);
7184   to_byte = CHAR_TO_BYTE (to);
7185
7186   if (from < GPT && to >= GPT)
7187     move_gap_both (to, to_byte);
7188
7189   return detect_coding_system (BYTE_POS_ADDR (from_byte),
7190                                to - from, to_byte - from_byte,
7191                                !NILP (highest),
7192                                !NILP (current_buffer
7193                                       ->enable_multibyte_characters),
7194                                Qnil);
7195 }
7196
7197 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7198        1, 2, 0,
7199        doc: /* Detect coding system of the text in STRING.
7200 Return a list of possible coding systems ordered by priority.
7201
7202 If only ASCII characters are found, it returns a list of single element
7203 `undecided' or its subsidiary coding system according to a detected
7204 end-of-line format.
7205
7206 If optional argument HIGHEST is non-nil, return the coding system of
7207 highest priority.  */)
7208      (string, highest)
7209      Lisp_Object string, highest;
7210 {
7211   CHECK_STRING (string);
7212
7213   return detect_coding_system (SDATA (string),
7214                                SCHARS (string), SBYTES (string),
7215                                !NILP (highest), STRING_MULTIBYTE (string),
7216                                Qnil);
7217 }
7218
7219
7220 static INLINE int
7221 char_encodable_p (c, attrs)
7222      int c;
7223      Lisp_Object attrs;
7224 {
7225   Lisp_Object tail;
7226   struct charset *charset;
7227   Lisp_Object translation_table;
7228
7229   translation_table = CODING_ATTR_TRANS_TBL (attrs);
7230   if (! NILP (translation_table))
7231     c = translate_char (translation_table, c);
7232   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7233        CONSP (tail); tail = XCDR (tail))
7234     {
7235       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7236       if (CHAR_CHARSET_P (c, charset))
7237         break;
7238     }
7239   return (! NILP (tail));
7240 }
7241
7242
7243 /* Return a list of coding systems that safely encode the text between
7244    START and END.  If EXCLUDE is non-nil, it is a list of coding
7245    systems not to check.  The returned list doesn't contain any such
7246    coding systems.  In any case, if the text contains only ASCII or is
7247    unibyte, return t.  */
7248
7249 DEFUN ("find-coding-systems-region-internal",
7250        Ffind_coding_systems_region_internal,
7251        Sfind_coding_systems_region_internal, 2, 3, 0,
7252        doc: /* Internal use only.  */)
7253      (start, end, exclude)
7254      Lisp_Object start, end, exclude;
7255 {
7256   Lisp_Object coding_attrs_list, safe_codings;
7257   EMACS_INT start_byte, end_byte;
7258   const unsigned char *p, *pbeg, *pend;
7259   int c;
7260   Lisp_Object tail, elt;
7261
7262   if (STRINGP (start))
7263     {
7264       if (!STRING_MULTIBYTE (start)
7265           || SCHARS (start) == SBYTES (start))
7266         return Qt;
7267       start_byte = 0;
7268       end_byte = SBYTES (start);
7269     }
7270   else
7271     {
7272       CHECK_NUMBER_COERCE_MARKER (start);
7273       CHECK_NUMBER_COERCE_MARKER (end);
7274       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7275         args_out_of_range (start, end);
7276       if (NILP (current_buffer->enable_multibyte_characters))
7277         return Qt;
7278       start_byte = CHAR_TO_BYTE (XINT (start));
7279       end_byte = CHAR_TO_BYTE (XINT (end));
7280       if (XINT (end) - XINT (start) == end_byte - start_byte)
7281         return Qt;
7282
7283       if (XINT (start) < GPT && XINT (end) > GPT)
7284         {
7285           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7286             move_gap_both (XINT (start), start_byte);
7287           else
7288             move_gap_both (XINT (end), end_byte);
7289         }
7290     }
7291
7292   coding_attrs_list = Qnil;
7293   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7294     if (NILP (exclude)
7295         || NILP (Fmemq (XCAR (tail), exclude)))
7296       {
7297         Lisp_Object attrs;
7298
7299         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7300         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7301             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7302           {
7303             ASET (attrs, coding_attr_trans_tbl,
7304                   get_translation_table (attrs, 1));
7305             coding_attrs_list = Fcons (attrs, coding_attrs_list);
7306           }
7307       }
7308
7309   if (STRINGP (start))
7310     p = pbeg = SDATA (start);
7311   else
7312     p = pbeg = BYTE_POS_ADDR (start_byte);
7313   pend = p + (end_byte - start_byte);
7314
7315   while (p < pend && ASCII_BYTE_P (*p)) p++;
7316   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7317
7318   while (p < pend)
7319     {
7320       if (ASCII_BYTE_P (*p))
7321         p++;
7322       else
7323         {
7324           c = STRING_CHAR_ADVANCE (p);
7325
7326           charset_map_loaded = 0;
7327           for (tail = coding_attrs_list; CONSP (tail);)
7328             {
7329               elt = XCAR (tail);
7330               if (NILP (elt))
7331                 tail = XCDR (tail);
7332               else if (char_encodable_p (c, elt))
7333                 tail = XCDR (tail);
7334               else if (CONSP (XCDR (tail)))
7335                 {
7336                   XSETCAR (tail, XCAR (XCDR (tail)));
7337                   XSETCDR (tail, XCDR (XCDR (tail)));
7338                 }
7339               else
7340                 {
7341                   XSETCAR (tail, Qnil);
7342                   tail = XCDR (tail);
7343                 }
7344             }
7345           if (charset_map_loaded)
7346             {
7347               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7348
7349               if (STRINGP (start))
7350                 pbeg = SDATA (start);
7351               else
7352                 pbeg = BYTE_POS_ADDR (start_byte);
7353               p = pbeg + p_offset;
7354               pend = pbeg + pend_offset;
7355             }
7356         }
7357     }
7358
7359   safe_codings = Qnil;
7360   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7361     if (! NILP (XCAR (tail)))
7362       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
7363
7364   return safe_codings;
7365 }
7366
7367
7368 DEFUN ("unencodable-char-position", Funencodable_char_position,
7369        Sunencodable_char_position, 3, 5, 0,
7370        doc: /*
7371 Return position of first un-encodable character in a region.
7372 START and END specfiy the region and CODING-SYSTEM specifies the
7373 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
7374
7375 If optional 4th argument COUNT is non-nil, it specifies at most how
7376 many un-encodable characters to search.  In this case, the value is a
7377 list of positions.
7378
7379 If optional 5th argument STRING is non-nil, it is a string to search
7380 for un-encodable characters.  In that case, START and END are indexes
7381 to the string.  */)
7382      (start, end, coding_system, count, string)
7383      Lisp_Object start, end, coding_system, count, string;
7384 {
7385   int n;
7386   struct coding_system coding;
7387   Lisp_Object attrs, charset_list, translation_table;
7388   Lisp_Object positions;
7389   int from, to;
7390   const unsigned char *p, *stop, *pend;
7391   int ascii_compatible;
7392
7393   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7394   attrs = CODING_ID_ATTRS (coding.id);
7395   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7396     return Qnil;
7397   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7398   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7399   translation_table = get_translation_table (attrs, 1);
7400
7401   if (NILP (string))
7402     {
7403       validate_region (&start, &end);
7404       from = XINT (start);
7405       to = XINT (end);
7406       if (NILP (current_buffer->enable_multibyte_characters)
7407           || (ascii_compatible
7408               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7409         return Qnil;
7410       p = CHAR_POS_ADDR (from);
7411       pend = CHAR_POS_ADDR (to);
7412       if (from < GPT && to >= GPT)
7413         stop = GPT_ADDR;
7414       else
7415         stop = pend;
7416     }
7417   else
7418     {
7419       CHECK_STRING (string);
7420       CHECK_NATNUM (start);
7421       CHECK_NATNUM (end);
7422       from = XINT (start);
7423       to = XINT (end);
7424       if (from > to
7425           || to > SCHARS (string))
7426         args_out_of_range_3 (string, start, end);
7427       if (! STRING_MULTIBYTE (string))
7428         return Qnil;
7429       p = SDATA (string) + string_char_to_byte (string, from);
7430       stop = pend = SDATA (string) + string_char_to_byte (string, to);
7431       if (ascii_compatible && (to - from) == (pend - p))
7432         return Qnil;
7433     }
7434
7435   if (NILP (count))
7436     n = 1;
7437   else
7438     {
7439       CHECK_NATNUM (count);
7440       n = XINT (count);
7441     }
7442
7443   positions = Qnil;
7444   while (1)
7445     {
7446       int c;
7447
7448       if (ascii_compatible)
7449         while (p < stop && ASCII_BYTE_P (*p))
7450           p++, from++;
7451       if (p >= stop)
7452         {
7453           if (p >= pend)
7454             break;
7455           stop = pend;
7456           p = GAP_END_ADDR;
7457         }
7458
7459       c = STRING_CHAR_ADVANCE (p);
7460       if (! (ASCII_CHAR_P (c) && ascii_compatible)
7461           && ! char_charset (translate_char (translation_table, c),
7462                              charset_list, NULL))
7463         {
7464           positions = Fcons (make_number (from), positions);
7465           n--;
7466           if (n == 0)
7467             break;
7468         }
7469
7470       from++;
7471     }
7472
7473   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
7474 }
7475
7476
7477 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7478        Scheck_coding_systems_region, 3, 3, 0,
7479        doc: /* Check if the region is encodable by coding systems.
7480
7481 START and END are buffer positions specifying the region.
7482 CODING-SYSTEM-LIST is a list of coding systems to check.
7483
7484 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7485 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7486 whole region, POS0, POS1, ... are buffer positions where non-encodable
7487 characters are found.
7488
7489 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7490 value is nil.
7491
7492 START may be a string.  In that case, check if the string is
7493 encodable, and the value contains indices to the string instead of
7494 buffer positions.  END is ignored.  */)
7495      (start, end, coding_system_list)
7496      Lisp_Object start, end, coding_system_list;
7497 {
7498   Lisp_Object list;
7499   EMACS_INT start_byte, end_byte;
7500   int pos;
7501   const unsigned char *p, *pbeg, *pend;
7502   int c;
7503   Lisp_Object tail, elt, attrs;
7504
7505   if (STRINGP (start))
7506     {
7507       if (!STRING_MULTIBYTE (start)
7508           && SCHARS (start) != SBYTES (start))
7509         return Qnil;
7510       start_byte = 0;
7511       end_byte = SBYTES (start);
7512       pos = 0;
7513     }
7514   else
7515     {
7516       CHECK_NUMBER_COERCE_MARKER (start);
7517       CHECK_NUMBER_COERCE_MARKER (end);
7518       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7519         args_out_of_range (start, end);
7520       if (NILP (current_buffer->enable_multibyte_characters))
7521         return Qnil;
7522       start_byte = CHAR_TO_BYTE (XINT (start));
7523       end_byte = CHAR_TO_BYTE (XINT (end));
7524       if (XINT (end) - XINT (start) == end_byte - start_byte)
7525         return Qt;
7526
7527       if (XINT (start) < GPT && XINT (end) > GPT)
7528         {
7529           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7530             move_gap_both (XINT (start), start_byte);
7531           else
7532             move_gap_both (XINT (end), end_byte);
7533         }
7534       pos = XINT (start);
7535     }
7536
7537   list = Qnil;
7538   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7539     {
7540       elt = XCAR (tail);
7541       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
7542       ASET (attrs, coding_attr_trans_tbl, get_translation_table (attrs, 1));
7543       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
7544     }
7545
7546   if (STRINGP (start))
7547     p = pbeg = SDATA (start);
7548   else
7549     p = pbeg = BYTE_POS_ADDR (start_byte);
7550   pend = p + (end_byte - start_byte);
7551
7552   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7553   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7554
7555   while (p < pend)
7556     {
7557       if (ASCII_BYTE_P (*p))
7558         p++;
7559       else
7560         {
7561           c = STRING_CHAR_ADVANCE (p);
7562
7563           charset_map_loaded = 0;
7564           for (tail = list; CONSP (tail); tail = XCDR (tail))
7565             {
7566               elt = XCDR (XCAR (tail));
7567               if (! char_encodable_p (c, XCAR (elt)))
7568                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7569             }
7570           if (charset_map_loaded)
7571             {
7572               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7573
7574               if (STRINGP (start))
7575                 pbeg = SDATA (start);
7576               else
7577                 pbeg = BYTE_POS_ADDR (start_byte);
7578               p = pbeg + p_offset;
7579               pend = pbeg + pend_offset;
7580             }
7581         }
7582       pos++;
7583     }
7584
7585   tail = list;
7586   list = Qnil;
7587   for (; CONSP (tail); tail = XCDR (tail))
7588     {
7589       elt = XCAR (tail);
7590       if (CONSP (XCDR (XCDR (elt))))
7591         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7592                       list);
7593     }
7594
7595   return list;
7596 }
7597
7598
7599 Lisp_Object
7600 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7601      Lisp_Object start, end, coding_system, dst_object;
7602      int encodep, norecord;
7603 {
7604   struct coding_system coding;
7605   EMACS_INT from, from_byte, to, to_byte;
7606   Lisp_Object src_object;
7607
7608   CHECK_NUMBER_COERCE_MARKER (start);
7609   CHECK_NUMBER_COERCE_MARKER (end);
7610   if (NILP (coding_system))
7611     coding_system = Qno_conversion;
7612   else
7613     CHECK_CODING_SYSTEM (coding_system);
7614   src_object = Fcurrent_buffer ();
7615   if (NILP (dst_object))
7616     dst_object = src_object;
7617   else if (! EQ (dst_object, Qt))
7618     CHECK_BUFFER (dst_object);
7619
7620   validate_region (&start, &end);
7621   from = XFASTINT (start);
7622   from_byte = CHAR_TO_BYTE (from);
7623   to = XFASTINT (end);
7624   to_byte = CHAR_TO_BYTE (to);
7625
7626   setup_coding_system (coding_system, &coding);
7627   coding.mode |= CODING_MODE_LAST_BLOCK;
7628
7629   if (encodep)
7630     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7631                           dst_object);
7632   else
7633     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7634                           dst_object);
7635   if (! norecord)
7636     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7637
7638   return (BUFFERP (dst_object)
7639           ? make_number (coding.produced_char)
7640           : coding.dst_object);
7641 }
7642
7643
7644 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
7645        3, 4, "r\nzCoding system: ",
7646        doc: /* Decode the current region from the specified coding system.
7647 When called from a program, takes four arguments:
7648         START, END, CODING-SYSTEM, and DESTINATION.
7649 START and END are buffer positions.
7650
7651 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7652 If nil, the region between START and END is replace by the decoded text.
7653 If buffer, the decoded text is inserted in the buffer.
7654 If t, the decoded text is returned.
7655
7656 This function sets `last-coding-system-used' to the precise coding system
7657 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7658 not fully specified.)
7659 It returns the length of the decoded text.  */)
7660      (start, end, coding_system, destination)
7661      Lisp_Object start, end, coding_system, destination;
7662 {
7663   return code_convert_region (start, end, coding_system, destination, 0, 0);
7664 }
7665
7666 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
7667        3, 4, "r\nzCoding system: ",
7668        doc: /* Encode the current region by specified coding system.
7669 When called from a program, takes three arguments:
7670 START, END, and CODING-SYSTEM.  START and END are buffer positions.
7671
7672 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7673 If nil, the region between START and END is replace by the encoded text.
7674 If buffer, the encoded text is inserted in the buffer.
7675 If t, the encoded text is returned.
7676
7677 This function sets `last-coding-system-used' to the precise coding system
7678 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7679 not fully specified.)
7680 It returns the length of the encoded text.  */)
7681   (start, end, coding_system, destination)
7682      Lisp_Object start, end, coding_system, destination;
7683 {
7684   return code_convert_region (start, end, coding_system, destination, 1, 0);
7685 }
7686
7687 Lisp_Object
7688 code_convert_string (string, coding_system, dst_object,
7689                      encodep, nocopy, norecord)
7690      Lisp_Object string, coding_system, dst_object;
7691      int encodep, nocopy, norecord;
7692 {
7693   struct coding_system coding;
7694   EMACS_INT chars, bytes;
7695
7696   CHECK_STRING (string);
7697   if (NILP (coding_system))
7698     {
7699       if (! norecord)
7700         Vlast_coding_system_used = Qno_conversion;
7701       if (NILP (dst_object))
7702         return (nocopy ? Fcopy_sequence (string) : string);
7703     }
7704
7705   if (NILP (coding_system))
7706     coding_system = Qno_conversion;
7707   else
7708     CHECK_CODING_SYSTEM (coding_system);
7709   if (NILP (dst_object))
7710     dst_object = Qt;
7711   else if (! EQ (dst_object, Qt))
7712     CHECK_BUFFER (dst_object);
7713
7714   setup_coding_system (coding_system, &coding);
7715   coding.mode |= CODING_MODE_LAST_BLOCK;
7716   chars = SCHARS (string);
7717   bytes = SBYTES (string);
7718   if (encodep)
7719     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7720   else
7721     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7722   if (! norecord)
7723     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7724
7725   return (BUFFERP (dst_object)
7726           ? make_number (coding.produced_char)
7727           : coding.dst_object);
7728 }
7729
7730
7731 /* Encode or decode STRING according to CODING_SYSTEM.
7732    Do not set Vlast_coding_system_used.
7733
7734    This function is called only from macros DECODE_FILE and
7735    ENCODE_FILE, thus we ignore character composition.  */
7736
7737 Lisp_Object
7738 code_convert_string_norecord (string, coding_system, encodep)
7739      Lisp_Object string, coding_system;
7740      int encodep;
7741 {
7742   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
7743 }
7744
7745
7746 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7747        2, 4, 0,
7748        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7749
7750 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7751 if the decoding operation is trivial.
7752
7753 Optional fourth arg BUFFER non-nil meant that the decoded text is
7754 inserted in BUFFER instead of returned as a string.  In this case,
7755 the return value is BUFFER.
7756
7757 This function sets `last-coding-system-used' to the precise coding system
7758 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7759 not fully specified.  */)
7760   (string, coding_system, nocopy, buffer)
7761      Lisp_Object string, coding_system, nocopy, buffer;
7762 {
7763   return code_convert_string (string, coding_system, buffer,
7764                               0, ! NILP (nocopy), 0);
7765 }
7766
7767 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7768        2, 4, 0,
7769        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7770
7771 Optional third arg NOCOPY non-nil means it is OK to return STRING
7772 itself if the encoding operation is trivial.
7773
7774 Optional fourth arg BUFFER non-nil meant that the encoded text is
7775 inserted in BUFFER instead of returned as a string.  In this case,
7776 the return value is BUFFER.
7777
7778 This function sets `last-coding-system-used' to the precise coding system
7779 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7780 not fully specified.)  */)
7781      (string, coding_system, nocopy, buffer)
7782      Lisp_Object string, coding_system, nocopy, buffer;
7783 {
7784   return code_convert_string (string, coding_system, buffer,
7785                               1, ! NILP (nocopy), 1);
7786 }
7787
7788 \f
7789 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7790        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7791 Return the corresponding character.  */)
7792      (code)
7793      Lisp_Object code;
7794 {
7795   Lisp_Object spec, attrs, val;
7796   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7797   int c;
7798
7799   CHECK_NATNUM (code);
7800   c = XFASTINT (code);
7801   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7802   attrs = AREF (spec, 0);
7803
7804   if (ASCII_BYTE_P (c)
7805       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7806     return code;
7807
7808   val = CODING_ATTR_CHARSET_LIST (attrs);
7809   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7810   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7811   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
7812
7813   if (c <= 0x7F)
7814     charset = charset_roman;
7815   else if (c >= 0xA0 && c < 0xDF)
7816     {
7817       charset = charset_kana;
7818       c -= 0x80;
7819     }
7820   else
7821     {
7822       int s1 = c >> 8, s2 = c & 0xFF;
7823
7824       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7825           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7826         error ("Invalid code: %d", code);
7827       SJIS_TO_JIS (c);
7828       charset = charset_kanji;
7829     }
7830   c = DECODE_CHAR (charset, c);
7831   if (c < 0)
7832     error ("Invalid code: %d", code);
7833   return make_number (c);
7834 }
7835
7836
7837 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7838        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7839 Return the corresponding code in SJIS.  */)
7840      (ch)
7841     Lisp_Object ch;
7842 {
7843   Lisp_Object spec, attrs, charset_list;
7844   int c;
7845   struct charset *charset;
7846   unsigned code;
7847
7848   CHECK_CHARACTER (ch);
7849   c = XFASTINT (ch);
7850   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7851   attrs = AREF (spec, 0);
7852
7853   if (ASCII_CHAR_P (c)
7854       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7855     return ch;
7856
7857   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7858   charset = char_charset (c, charset_list, &code);
7859   if (code == CHARSET_INVALID_CODE (charset))
7860     error ("Can't encode by shift_jis encoding: %d", c);
7861   JIS_TO_SJIS (code);
7862
7863   return make_number (code);
7864 }
7865
7866 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7867        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7868 Return the corresponding character.  */)
7869      (code)
7870      Lisp_Object code;
7871 {
7872   Lisp_Object spec, attrs, val;
7873   struct charset *charset_roman, *charset_big5, *charset;
7874   int c;
7875
7876   CHECK_NATNUM (code);
7877   c = XFASTINT (code);
7878   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7879   attrs = AREF (spec, 0);
7880
7881   if (ASCII_BYTE_P (c)
7882       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7883     return code;
7884
7885   val = CODING_ATTR_CHARSET_LIST (attrs);
7886   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7887   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7888
7889   if (c <= 0x7F)
7890     charset = charset_roman;
7891   else
7892     {
7893       int b1 = c >> 8, b2 = c & 0x7F;
7894       if (b1 < 0xA1 || b1 > 0xFE
7895           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7896         error ("Invalid code: %d", code);
7897       charset = charset_big5;
7898     }
7899   c = DECODE_CHAR (charset, (unsigned )c);
7900   if (c < 0)
7901     error ("Invalid code: %d", code);
7902   return make_number (c);
7903 }
7904
7905 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7906        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7907 Return the corresponding character code in Big5.  */)
7908      (ch)
7909      Lisp_Object ch;
7910 {
7911   Lisp_Object spec, attrs, charset_list;
7912   struct charset *charset;
7913   int c;
7914   unsigned code;
7915
7916   CHECK_CHARACTER (ch);
7917   c = XFASTINT (ch);
7918   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7919   attrs = AREF (spec, 0);
7920   if (ASCII_CHAR_P (c)
7921       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7922     return ch;
7923
7924   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7925   charset = char_charset (c, charset_list, &code);
7926   if (code == CHARSET_INVALID_CODE (charset))
7927     error ("Can't encode by Big5 encoding: %d", c);
7928
7929   return make_number (code);
7930 }
7931
7932 \f
7933 DEFUN ("set-terminal-coding-system-internal",
7934        Fset_terminal_coding_system_internal,
7935        Sset_terminal_coding_system_internal, 1, 1, 0,
7936        doc: /* Internal use only.  */)
7937      (coding_system)
7938      Lisp_Object coding_system;
7939 {
7940   CHECK_SYMBOL (coding_system);
7941   setup_coding_system (Fcheck_coding_system (coding_system),
7942                         &terminal_coding);
7943
7944   /* We had better not send unsafe characters to terminal.  */
7945   terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7946   /* Characer composition should be disabled.  */
7947   terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7948   terminal_coding.src_multibyte = 1;
7949   terminal_coding.dst_multibyte = 0;
7950   return Qnil;
7951 }
7952
7953 DEFUN ("set-safe-terminal-coding-system-internal",
7954        Fset_safe_terminal_coding_system_internal,
7955        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7956        doc: /* Internal use only.  */)
7957      (coding_system)
7958      Lisp_Object coding_system;
7959 {
7960   CHECK_SYMBOL (coding_system);
7961   setup_coding_system (Fcheck_coding_system (coding_system),
7962                        &safe_terminal_coding);
7963   /* Characer composition should be disabled.  */
7964   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7965   safe_terminal_coding.src_multibyte = 1;
7966   safe_terminal_coding.dst_multibyte = 0;
7967   return Qnil;
7968 }
7969
7970 DEFUN ("terminal-coding-system",
7971        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
7972        doc: /* Return coding system specified for terminal output.  */)
7973      ()
7974 {
7975   return CODING_ID_NAME (terminal_coding.id);
7976 }
7977
7978 DEFUN ("set-keyboard-coding-system-internal",
7979        Fset_keyboard_coding_system_internal,
7980        Sset_keyboard_coding_system_internal, 1, 1, 0,
7981        doc: /* Internal use only.  */)
7982      (coding_system)
7983      Lisp_Object coding_system;
7984 {
7985   CHECK_SYMBOL (coding_system);
7986   setup_coding_system (Fcheck_coding_system (coding_system),
7987                        &keyboard_coding);
7988   /* Characer composition should be disabled.  */
7989   keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7990   return Qnil;
7991 }
7992
7993 DEFUN ("keyboard-coding-system",
7994        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
7995        doc: /* Return coding system specified for decoding keyboard input.  */)
7996      ()
7997 {
7998   return CODING_ID_NAME (keyboard_coding.id);
7999 }
8000
8001 \f
8002 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8003        Sfind_operation_coding_system,  1, MANY, 0,
8004        doc: /* Choose a coding system for an operation based on the target name.
8005 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8006 DECODING-SYSTEM is the coding system to use for decoding
8007 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8008 for encoding (in case OPERATION does encoding).
8009
8010 The first argument OPERATION specifies an I/O primitive:
8011   For file I/O, `insert-file-contents' or `write-region'.
8012   For process I/O, `call-process', `call-process-region', or `start-process'.
8013   For network I/O, `open-network-stream'.
8014
8015 The remaining arguments should be the same arguments that were passed
8016 to the primitive.  Depending on which primitive, one of those arguments
8017 is selected as the TARGET.  For example, if OPERATION does file I/O,
8018 whichever argument specifies the file name is TARGET.
8019
8020 TARGET has a meaning which depends on OPERATION:
8021   For file I/O, TARGET is a file name.
8022   For process I/O, TARGET is a process name.
8023   For network I/O, TARGET is a service name or a port number
8024
8025 This function looks up what specified for TARGET in,
8026 `file-coding-system-alist', `process-coding-system-alist',
8027 or `network-coding-system-alist' depending on OPERATION.
8028 They may specify a coding system, a cons of coding systems,
8029 or a function symbol to call.
8030 In the last case, we call the function with one argument,
8031 which is a list of all the arguments given to this function.
8032
8033 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
8034      (nargs, args)
8035      int nargs;
8036      Lisp_Object *args;
8037 {
8038   Lisp_Object operation, target_idx, target, val;
8039   register Lisp_Object chain;
8040
8041   if (nargs < 2)
8042     error ("Too few arguments");
8043   operation = args[0];
8044   if (!SYMBOLP (operation)
8045       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
8046     error ("Invalid first arguement");
8047   if (nargs < 1 + XINT (target_idx))
8048     error ("Too few arguments for operation: %s",
8049            SDATA (SYMBOL_NAME (operation)));
8050   target = args[XINT (target_idx) + 1];
8051   if (!(STRINGP (target)
8052         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
8053     error ("Invalid %dth argument", XINT (target_idx) + 1);
8054
8055   chain = ((EQ (operation, Qinsert_file_contents)
8056             || EQ (operation, Qwrite_region))
8057            ? Vfile_coding_system_alist
8058            : (EQ (operation, Qopen_network_stream)
8059               ? Vnetwork_coding_system_alist
8060               : Vprocess_coding_system_alist));
8061   if (NILP (chain))
8062     return Qnil;
8063
8064   for (; CONSP (chain); chain = XCDR (chain))
8065     {
8066       Lisp_Object elt;
8067
8068       elt = XCAR (chain);
8069       if (CONSP (elt)
8070           && ((STRINGP (target)
8071                && STRINGP (XCAR (elt))
8072                && fast_string_match (XCAR (elt), target) >= 0)
8073               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
8074         {
8075           val = XCDR (elt);
8076           /* Here, if VAL is both a valid coding system and a valid
8077              function symbol, we return VAL as a coding system.  */
8078           if (CONSP (val))
8079             return val;
8080           if (! SYMBOLP (val))
8081             return Qnil;
8082           if (! NILP (Fcoding_system_p (val)))
8083             return Fcons (val, val);
8084           if (! NILP (Ffboundp (val)))
8085             {
8086               val = call1 (val, Flist (nargs, args));
8087               if (CONSP (val))
8088                 return val;
8089               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8090                 return Fcons (val, val);
8091             }
8092           return Qnil;
8093         }
8094     }
8095   return Qnil;
8096 }
8097
8098 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
8099        Sset_coding_system_priority, 0, MANY, 0,
8100        doc: /* Assign higher priority to the coding systems given as arguments.
8101 If multiple coding systems belongs to the same category,
8102 all but the first one are ignored.
8103
8104 usage: (set-coding-system-priority ...)  */)
8105      (nargs, args)
8106      int nargs;
8107      Lisp_Object *args;
8108 {
8109   int i, j;
8110   int changed[coding_category_max];
8111   enum coding_category priorities[coding_category_max];
8112
8113   bzero (changed, sizeof changed);
8114
8115   for (i = j = 0; i < nargs; i++)
8116     {
8117       enum coding_category category;
8118       Lisp_Object spec, attrs;
8119
8120       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8121       attrs = AREF (spec, 0);
8122       category = XINT (CODING_ATTR_CATEGORY (attrs));
8123       if (changed[category])
8124         /* Ignore this coding system because a coding system of the
8125            same category already had a higher priority.  */
8126         continue;
8127       changed[category] = 1;
8128       priorities[j++] = category;
8129       if (coding_categories[category].id >= 0
8130           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8131         setup_coding_system (args[i], &coding_categories[category]);
8132       Fset (AREF (Vcoding_category_table, category), args[i]);
8133     }
8134
8135   /* Now we have decided top J priorities.  Reflect the order of the
8136      original priorities to the remaining priorities.  */
8137
8138   for (i = j, j = 0; i < coding_category_max; i++, j++)
8139     {
8140       while (j < coding_category_max
8141              && changed[coding_priorities[j]])
8142         j++;
8143       if (j == coding_category_max)
8144         abort ();
8145       priorities[i] = coding_priorities[j];
8146     }
8147
8148   bcopy (priorities, coding_priorities, sizeof priorities);
8149
8150   /* Update `coding-category-list'.  */
8151   Vcoding_category_list = Qnil;
8152   for (i = coding_category_max - 1; i >= 0; i--)
8153     Vcoding_category_list
8154       = Fcons (AREF (Vcoding_category_table, priorities[i]),
8155                Vcoding_category_list);
8156
8157   return Qnil;
8158 }
8159
8160 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8161        Scoding_system_priority_list, 0, 1, 0,
8162        doc: /* Return a list of coding systems ordered by their priorities.
8163 HIGHESTP non-nil means just return the highest priority one.  */)
8164      (highestp)
8165      Lisp_Object highestp;
8166 {
8167   int i;
8168   Lisp_Object val;
8169
8170   for (i = 0, val = Qnil; i < coding_category_max; i++)
8171     {
8172       enum coding_category category = coding_priorities[i];
8173       int id = coding_categories[category].id;
8174       Lisp_Object attrs;
8175
8176       if (id < 0)
8177         continue;
8178       attrs = CODING_ID_ATTRS (id);
8179       if (! NILP (highestp))
8180         return CODING_ATTR_BASE_NAME (attrs);
8181       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8182     }
8183   return Fnreverse (val);
8184 }
8185
8186 static char *suffixes[] = { "-unix", "-dos", "-mac" };
8187
8188 static Lisp_Object
8189 make_subsidiaries (base)
8190      Lisp_Object base;
8191 {
8192   Lisp_Object subsidiaries;
8193   int base_name_len = SBYTES (SYMBOL_NAME (base));
8194   char *buf = (char *) alloca (base_name_len + 6);
8195   int i;
8196
8197   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
8198   subsidiaries = Fmake_vector (make_number (3), Qnil);
8199   for (i = 0; i < 3; i++)
8200     {
8201       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
8202       ASET (subsidiaries, i, intern (buf));
8203     }
8204   return subsidiaries;
8205 }
8206
8207
8208 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
8209        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
8210        doc: /* For internal use only.
8211 usage: (define-coding-system-internal ...)  */)
8212      (nargs, args)
8213      int nargs;
8214      Lisp_Object *args;
8215 {
8216   Lisp_Object name;
8217   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
8218   Lisp_Object attrs;            /* Vector of attributes.  */
8219   Lisp_Object eol_type;
8220   Lisp_Object aliases;
8221   Lisp_Object coding_type, charset_list, safe_charsets;
8222   enum coding_category category;
8223   Lisp_Object tail, val;
8224   int max_charset_id = 0;
8225   int i;
8226
8227   if (nargs < coding_arg_max)
8228     goto short_args;
8229
8230   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
8231
8232   name = args[coding_arg_name];
8233   CHECK_SYMBOL (name);
8234   CODING_ATTR_BASE_NAME (attrs) = name;
8235
8236   val = args[coding_arg_mnemonic];
8237   if (! STRINGP (val))
8238     CHECK_CHARACTER (val);
8239   CODING_ATTR_MNEMONIC (attrs) = val;
8240
8241   coding_type = args[coding_arg_coding_type];
8242   CHECK_SYMBOL (coding_type);
8243   CODING_ATTR_TYPE (attrs) = coding_type;
8244
8245   charset_list = args[coding_arg_charset_list];
8246   if (SYMBOLP (charset_list))
8247     {
8248       if (EQ (charset_list, Qiso_2022))
8249         {
8250           if (! EQ (coding_type, Qiso_2022))
8251             error ("Invalid charset-list");
8252           charset_list = Viso_2022_charset_list;
8253         }
8254       else if (EQ (charset_list, Qemacs_mule))
8255         {
8256           if (! EQ (coding_type, Qemacs_mule))
8257             error ("Invalid charset-list");
8258           charset_list = Vemacs_mule_charset_list;
8259         }
8260       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8261         if (max_charset_id < XFASTINT (XCAR (tail)))
8262           max_charset_id = XFASTINT (XCAR (tail));
8263     }
8264   else
8265     {
8266       charset_list = Fcopy_sequence (charset_list);
8267       for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
8268         {
8269           struct charset *charset;
8270
8271           val = Fcar (tail);
8272           CHECK_CHARSET_GET_CHARSET (val, charset);
8273           if (EQ (coding_type, Qiso_2022)
8274               ? CHARSET_ISO_FINAL (charset) < 0
8275               : EQ (coding_type, Qemacs_mule)
8276               ? CHARSET_EMACS_MULE_ID (charset) < 0
8277               : 0)
8278             error ("Can't handle charset `%s'",
8279                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8280
8281           XSETCAR (tail, make_number (charset->id));
8282           if (max_charset_id < charset->id)
8283             max_charset_id = charset->id;
8284         }
8285     }
8286   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
8287
8288   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8289                                 make_number (255));
8290   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8291     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
8292   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
8293
8294   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
8295
8296   val = args[coding_arg_decode_translation_table];
8297   if (! CHAR_TABLE_P (val) && ! CONSP (val))
8298     CHECK_SYMBOL (val);
8299   CODING_ATTR_DECODE_TBL (attrs) = val;
8300
8301   val = args[coding_arg_encode_translation_table];
8302   if (! CHAR_TABLE_P (val) && ! CONSP (val))
8303     CHECK_SYMBOL (val);
8304   CODING_ATTR_ENCODE_TBL (attrs) = val;
8305
8306   val = args[coding_arg_post_read_conversion];
8307   CHECK_SYMBOL (val);
8308   CODING_ATTR_POST_READ (attrs) = val;
8309
8310   val = args[coding_arg_pre_write_conversion];
8311   CHECK_SYMBOL (val);
8312   CODING_ATTR_PRE_WRITE (attrs) = val;
8313
8314   val = args[coding_arg_default_char];
8315   if (NILP (val))
8316     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8317   else
8318     {
8319       CHECK_CHARACTER (val);
8320       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8321     }
8322
8323   val = args[coding_arg_for_unibyte];
8324   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
8325
8326   val = args[coding_arg_plist];
8327   CHECK_LIST (val);
8328   CODING_ATTR_PLIST (attrs) = val;
8329
8330   if (EQ (coding_type, Qcharset))
8331     {
8332       /* Generate a lisp vector of 256 elements.  Each element is nil,
8333          integer, or a list of charset IDs.
8334
8335          If Nth element is nil, the byte code N is invalid in this
8336          coding system.
8337
8338          If Nth element is a number NUM, N is the first byte of a
8339          charset whose ID is NUM.
8340
8341          If Nth element is a list of charset IDs, N is the first byte
8342          of one of them.  The list is sorted by dimensions of the
8343          charsets.  A charset of smaller dimension comes firtst. */
8344       val = Fmake_vector (make_number (256), Qnil);
8345
8346       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8347         {
8348           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8349           int dim = CHARSET_DIMENSION (charset);
8350           int idx = (dim - 1) * 4;
8351
8352           if (CHARSET_ASCII_COMPATIBLE_P (charset))
8353             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8354
8355           for (i = charset->code_space[idx];
8356                i <= charset->code_space[idx + 1]; i++)
8357             {
8358               Lisp_Object tmp, tmp2;
8359               int dim2;
8360
8361               tmp = AREF (val, i);
8362               if (NILP (tmp))
8363                 tmp = XCAR (tail);
8364               else if (NUMBERP (tmp))
8365                 {
8366                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8367                   if (dim < dim2)
8368                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
8369                   else
8370                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
8371                 }
8372               else
8373                 {
8374                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8375                     {
8376                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8377                       if (dim < dim2)
8378                         break;
8379                     }
8380                   if (NILP (tmp2))
8381                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8382                   else
8383                     {
8384                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8385                       XSETCAR (tmp2, XCAR (tail));
8386                     }
8387                 }
8388               ASET (val, i, tmp);
8389             }
8390         }
8391       ASET (attrs, coding_attr_charset_valids, val);
8392       category = coding_category_charset;
8393     }
8394   else if (EQ (coding_type, Qccl))
8395     {
8396       Lisp_Object valids;
8397
8398       if (nargs < coding_arg_ccl_max)
8399         goto short_args;
8400
8401       val = args[coding_arg_ccl_decoder];
8402       CHECK_CCL_PROGRAM (val);
8403       if (VECTORP (val))
8404         val = Fcopy_sequence (val);
8405       ASET (attrs, coding_attr_ccl_decoder, val);
8406
8407       val = args[coding_arg_ccl_encoder];
8408       CHECK_CCL_PROGRAM (val);
8409       if (VECTORP (val))
8410         val = Fcopy_sequence (val);
8411       ASET (attrs, coding_attr_ccl_encoder, val);
8412
8413       val = args[coding_arg_ccl_valids];
8414       valids = Fmake_string (make_number (256), make_number (0));
8415       for (tail = val; !NILP (tail); tail = Fcdr (tail))
8416         {
8417           int from, to;
8418
8419           val = Fcar (tail);
8420           if (INTEGERP (val))
8421             {
8422               from = to = XINT (val);
8423               if (from < 0 || from > 255)
8424                 args_out_of_range_3 (val, make_number (0), make_number (255));
8425             }
8426           else
8427             {
8428               CHECK_CONS (val);
8429               CHECK_NATNUM_CAR (val);
8430               CHECK_NATNUM_CDR (val);
8431               from = XINT (XCAR (val));
8432               if (from > 255)
8433                 args_out_of_range_3 (XCAR (val),
8434                                      make_number (0), make_number (255));
8435               to = XINT (XCDR (val));
8436               if (to < from || to > 255)
8437                 args_out_of_range_3 (XCDR (val),
8438                                      XCAR (val), make_number (255));
8439             }
8440           for (i = from; i <= to; i++)
8441             SSET (valids, i, 1);
8442         }
8443       ASET (attrs, coding_attr_ccl_valids, valids);
8444
8445       category = coding_category_ccl;
8446     }
8447   else if (EQ (coding_type, Qutf_16))
8448     {
8449       Lisp_Object bom, endian;
8450
8451       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8452
8453       if (nargs < coding_arg_utf16_max)
8454         goto short_args;
8455
8456       bom = args[coding_arg_utf16_bom];
8457       if (! NILP (bom) && ! EQ (bom, Qt))
8458         {
8459           CHECK_CONS (bom);
8460           val = XCAR (bom);
8461           CHECK_CODING_SYSTEM (val);
8462           val = XCDR (bom);
8463           CHECK_CODING_SYSTEM (val);
8464         }
8465       ASET (attrs, coding_attr_utf_16_bom, bom);
8466
8467       endian = args[coding_arg_utf16_endian];
8468       CHECK_SYMBOL (endian);
8469       if (NILP (endian))
8470         endian = Qbig;
8471       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8472         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
8473       ASET (attrs, coding_attr_utf_16_endian, endian);
8474
8475       category = (CONSP (bom)
8476                   ? coding_category_utf_16_auto
8477                   : NILP (bom)
8478                   ? (EQ (endian, Qbig)
8479                      ? coding_category_utf_16_be_nosig
8480                      : coding_category_utf_16_le_nosig)
8481                   : (EQ (endian, Qbig)
8482                      ? coding_category_utf_16_be
8483                      : coding_category_utf_16_le));
8484     }
8485   else if (EQ (coding_type, Qiso_2022))
8486     {
8487       Lisp_Object initial, reg_usage, request, flags;
8488       int i;
8489
8490       if (nargs < coding_arg_iso2022_max)
8491         goto short_args;
8492
8493       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8494       CHECK_VECTOR (initial);
8495       for (i = 0; i < 4; i++)
8496         {
8497           val = Faref (initial, make_number (i));
8498           if (! NILP (val))
8499             {
8500               struct charset *charset;
8501
8502               CHECK_CHARSET_GET_CHARSET (val, charset);
8503               ASET (initial, i, make_number (CHARSET_ID (charset)));
8504               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8505                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8506             }
8507           else
8508             ASET (initial, i, make_number (-1));
8509         }
8510
8511       reg_usage = args[coding_arg_iso2022_reg_usage];
8512       CHECK_CONS (reg_usage);
8513       CHECK_NUMBER_CAR (reg_usage);
8514       CHECK_NUMBER_CDR (reg_usage);
8515
8516       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8517       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
8518         {
8519           int id;
8520           Lisp_Object tmp;
8521
8522           val = Fcar (tail);
8523           CHECK_CONS (val);
8524           tmp = XCAR (val);
8525           CHECK_CHARSET_GET_ID (tmp, id);
8526           CHECK_NATNUM_CDR (val);
8527           if (XINT (XCDR (val)) >= 4)
8528             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8529           XSETCAR (val, make_number (id));
8530         }
8531
8532       flags = args[coding_arg_iso2022_flags];
8533       CHECK_NATNUM (flags);
8534       i = XINT (flags);
8535       if (EQ (args[coding_arg_charset_list], Qiso_2022))
8536         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8537
8538       ASET (attrs, coding_attr_iso_initial, initial);
8539       ASET (attrs, coding_attr_iso_usage, reg_usage);
8540       ASET (attrs, coding_attr_iso_request, request);
8541       ASET (attrs, coding_attr_iso_flags, flags);
8542       setup_iso_safe_charsets (attrs);
8543
8544       if (i & CODING_ISO_FLAG_SEVEN_BITS)
8545         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8546                           | CODING_ISO_FLAG_SINGLE_SHIFT))
8547                     ? coding_category_iso_7_else
8548                     : EQ (args[coding_arg_charset_list], Qiso_2022)
8549                     ? coding_category_iso_7
8550                     : coding_category_iso_7_tight);
8551       else
8552         {
8553           int id = XINT (AREF (initial, 1));
8554
8555           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
8556                        || EQ (args[coding_arg_charset_list], Qiso_2022)
8557                        || id < 0)
8558                       ? coding_category_iso_8_else
8559                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8560                       ? coding_category_iso_8_1
8561                       : coding_category_iso_8_2);
8562         }
8563       if (category != coding_category_iso_8_1
8564           && category != coding_category_iso_8_2)
8565         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8566     }
8567   else if (EQ (coding_type, Qemacs_mule))
8568     {
8569       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8570         ASET (attrs, coding_attr_emacs_mule_full, Qt);
8571       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8572       category = coding_category_emacs_mule;
8573     }
8574   else if (EQ (coding_type, Qshift_jis))
8575     {
8576
8577       struct charset *charset;
8578
8579       if (XINT (Flength (charset_list)) != 3
8580           && XINT (Flength (charset_list)) != 4)
8581         error ("There should be three or four charsets");
8582
8583       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8584       if (CHARSET_DIMENSION (charset) != 1)
8585         error ("Dimension of charset %s is not one",
8586                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8587       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8588         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8589
8590       charset_list = XCDR (charset_list);
8591       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8592       if (CHARSET_DIMENSION (charset) != 1)
8593         error ("Dimension of charset %s is not one",
8594                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8595
8596       charset_list = XCDR (charset_list);
8597       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8598       if (CHARSET_DIMENSION (charset) != 2)
8599         error ("Dimension of charset %s is not two",
8600                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8601
8602       charset_list = XCDR (charset_list);
8603       if (! NILP (charset_list))
8604         {
8605           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8606           if (CHARSET_DIMENSION (charset) != 2)
8607             error ("Dimension of charset %s is not two",
8608                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8609         }
8610
8611       category = coding_category_sjis;
8612       Vsjis_coding_system = name;
8613     }
8614   else if (EQ (coding_type, Qbig5))
8615     {
8616       struct charset *charset;
8617
8618       if (XINT (Flength (charset_list)) != 2)
8619         error ("There should be just two charsets");
8620
8621       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8622       if (CHARSET_DIMENSION (charset) != 1)
8623         error ("Dimension of charset %s is not one",
8624                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8625       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8626         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8627
8628       charset_list = XCDR (charset_list);
8629       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8630       if (CHARSET_DIMENSION (charset) != 2)
8631         error ("Dimension of charset %s is not two",
8632                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8633
8634       category = coding_category_big5;
8635       Vbig5_coding_system = name;
8636     }
8637   else if (EQ (coding_type, Qraw_text))
8638     {
8639       category = coding_category_raw_text;
8640       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8641     }
8642   else if (EQ (coding_type, Qutf_8))
8643     {
8644       category = coding_category_utf_8;
8645       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8646     }
8647   else if (EQ (coding_type, Qundecided))
8648     category = coding_category_undecided;
8649   else
8650     error ("Invalid coding system type: %s",
8651            SDATA (SYMBOL_NAME (coding_type)));
8652
8653   CODING_ATTR_CATEGORY (attrs) = make_number (category);
8654   CODING_ATTR_PLIST (attrs)
8655     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
8656                                 CODING_ATTR_PLIST (attrs)));
8657
8658   eol_type = args[coding_arg_eol_type];
8659   if (! NILP (eol_type)
8660       && ! EQ (eol_type, Qunix)
8661       && ! EQ (eol_type, Qdos)
8662       && ! EQ (eol_type, Qmac))
8663     error ("Invalid eol-type");
8664
8665   aliases = Fcons (name, Qnil);
8666
8667   if (NILP (eol_type))
8668     {
8669       eol_type = make_subsidiaries (name);
8670       for (i = 0; i < 3; i++)
8671         {
8672           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8673
8674           this_name = AREF (eol_type, i);
8675           this_aliases = Fcons (this_name, Qnil);
8676           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8677           this_spec = Fmake_vector (make_number (3), attrs);
8678           ASET (this_spec, 1, this_aliases);
8679           ASET (this_spec, 2, this_eol_type);
8680           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8681           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8682           Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8683                                         Vcoding_system_alist);
8684         }
8685     }
8686
8687   spec_vec = Fmake_vector (make_number (3), attrs);
8688   ASET (spec_vec, 1, aliases);
8689   ASET (spec_vec, 2, eol_type);
8690
8691   Fputhash (name, spec_vec, Vcoding_system_hash_table);
8692   Vcoding_system_list = Fcons (name, Vcoding_system_list);
8693   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8694                                 Vcoding_system_alist);
8695
8696   {
8697     int id = coding_categories[category].id;
8698
8699     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8700       setup_coding_system (name, &coding_categories[category]);
8701   }
8702
8703   return Qnil;
8704
8705  short_args:
8706   return Fsignal (Qwrong_number_of_arguments,
8707                   Fcons (intern ("define-coding-system-internal"),
8708                          make_number (nargs)));
8709 }
8710
8711
8712 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
8713        3, 3, 0,
8714        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
8715   (coding_system, prop, val)
8716      Lisp_Object coding_system, prop, val;
8717 {
8718   Lisp_Object spec, attrs, plist;
8719
8720   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8721   attrs = AREF (spec, 0);
8722   if (EQ (prop, QCmnemonic))
8723     {
8724       if (! STRINGP (val))
8725         CHECK_CHARACTER (val);
8726       CODING_ATTR_MNEMONIC (attrs) = val;
8727     }
8728   else if (EQ (prop, QCdefalut_char))
8729     {
8730       if (NILP (val))
8731         val = make_number (' ');
8732       else
8733         CHECK_CHARACTER (val);
8734       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8735     }
8736   else if (EQ (prop, QCdecode_translation_table))
8737     {
8738       if (! CHAR_TABLE_P (val) && ! CONSP (val))
8739         CHECK_SYMBOL (val);
8740       CODING_ATTR_DECODE_TBL (attrs) = val;
8741     }
8742   else if (EQ (prop, QCencode_translation_table))
8743     {
8744       if (! CHAR_TABLE_P (val) && ! CONSP (val))
8745         CHECK_SYMBOL (val);
8746       CODING_ATTR_ENCODE_TBL (attrs) = val;
8747     }
8748   else if (EQ (prop, QCpost_read_conversion))
8749     {
8750       CHECK_SYMBOL (val);
8751       CODING_ATTR_POST_READ (attrs) = val;
8752     }
8753   else if (EQ (prop, QCpre_write_conversion))
8754     {
8755       CHECK_SYMBOL (val);
8756       CODING_ATTR_PRE_WRITE (attrs) = val;
8757     }
8758
8759   CODING_ATTR_PLIST (attrs)
8760     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
8761   return val;
8762 }
8763
8764
8765 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8766        Sdefine_coding_system_alias, 2, 2, 0,
8767        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
8768      (alias, coding_system)
8769      Lisp_Object alias, coding_system;
8770 {
8771   Lisp_Object spec, aliases, eol_type;
8772
8773   CHECK_SYMBOL (alias);
8774   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8775   aliases = AREF (spec, 1);
8776   /* ALISES should be a list of length more than zero, and the first
8777      element is a base coding system.  Append ALIAS at the tail of the
8778      list.  */
8779   while (!NILP (XCDR (aliases)))
8780     aliases = XCDR (aliases);
8781   XSETCDR (aliases, Fcons (alias, Qnil));
8782
8783   eol_type = AREF (spec, 2);
8784   if (VECTORP (eol_type))
8785     {
8786       Lisp_Object subsidiaries;
8787       int i;
8788
8789       subsidiaries = make_subsidiaries (alias);
8790       for (i = 0; i < 3; i++)
8791         Fdefine_coding_system_alias (AREF (subsidiaries, i),
8792                                      AREF (eol_type, i));
8793     }
8794
8795   Fputhash (alias, spec, Vcoding_system_hash_table);
8796   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
8797   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8798                                 Vcoding_system_alist);
8799
8800   return Qnil;
8801 }
8802
8803 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8804        1, 1, 0,
8805        doc: /* Return the base of CODING-SYSTEM.
8806 Any alias or subsidiary coding system is not a base coding system.  */)
8807   (coding_system)
8808      Lisp_Object coding_system;
8809 {
8810   Lisp_Object spec, attrs;
8811
8812   if (NILP (coding_system))
8813     return (Qno_conversion);
8814   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8815   attrs = AREF (spec, 0);
8816   return CODING_ATTR_BASE_NAME (attrs);
8817 }
8818
8819 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8820        1, 1, 0,
8821        doc: "Return the property list of CODING-SYSTEM.")
8822      (coding_system)
8823      Lisp_Object coding_system;
8824 {
8825   Lisp_Object spec, attrs;
8826
8827   if (NILP (coding_system))
8828     coding_system = Qno_conversion;
8829   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8830   attrs = AREF (spec, 0);
8831   return CODING_ATTR_PLIST (attrs);
8832 }
8833
8834
8835 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8836        1, 1, 0,
8837        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
8838      (coding_system)
8839      Lisp_Object coding_system;
8840 {
8841   Lisp_Object spec;
8842
8843   if (NILP (coding_system))
8844     coding_system = Qno_conversion;
8845   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8846   return AREF (spec, 1);
8847 }
8848
8849 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8850        Scoding_system_eol_type, 1, 1, 0,
8851        doc: /* Return eol-type of CODING-SYSTEM.
8852 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8853
8854 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8855 and CR respectively.
8856
8857 A vector value indicates that a format of end-of-line should be
8858 detected automatically.  Nth element of the vector is the subsidiary
8859 coding system whose eol-type is N.  */)
8860      (coding_system)
8861      Lisp_Object coding_system;
8862 {
8863   Lisp_Object spec, eol_type;
8864   int n;
8865
8866   if (NILP (coding_system))
8867     coding_system = Qno_conversion;
8868   if (! CODING_SYSTEM_P (coding_system))
8869     return Qnil;
8870   spec = CODING_SYSTEM_SPEC (coding_system);
8871   eol_type = AREF (spec, 2);
8872   if (VECTORP (eol_type))
8873     return Fcopy_sequence (eol_type);
8874   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8875   return make_number (n);
8876 }
8877
8878 #endif /* emacs */
8879
8880 \f
8881 /*** 9. Post-amble ***/
8882
8883 void
8884 init_coding_once ()
8885 {
8886   int i;
8887
8888   for (i = 0; i < coding_category_max; i++)
8889     {
8890       coding_categories[i].id = -1;
8891       coding_priorities[i] = i;
8892     }
8893
8894   /* ISO2022 specific initialize routine.  */
8895   for (i = 0; i < 0x20; i++)
8896     iso_code_class[i] = ISO_control_0;
8897   for (i = 0x21; i < 0x7F; i++)
8898     iso_code_class[i] = ISO_graphic_plane_0;
8899   for (i = 0x80; i < 0xA0; i++)
8900     iso_code_class[i] = ISO_control_1;
8901   for (i = 0xA1; i < 0xFF; i++)
8902     iso_code_class[i] = ISO_graphic_plane_1;
8903   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8904   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8905   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8906   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8907   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8908   iso_code_class[ISO_CODE_ESC] = ISO_escape;
8909   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8910   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
8911   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
8912
8913   for (i = 0; i < 256; i++)
8914     {
8915       emacs_mule_bytes[i] = 1;
8916     }
8917   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
8918   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
8919   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
8920   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
8921 }
8922
8923 #ifdef emacs
8924
8925 void
8926 syms_of_coding ()
8927 {
8928   staticpro (&Vcoding_system_hash_table);
8929   {
8930     Lisp_Object args[2];
8931     args[0] = QCtest;
8932     args[1] = Qeq;
8933     Vcoding_system_hash_table = Fmake_hash_table (2, args);
8934   }
8935
8936   staticpro (&Vsjis_coding_system);
8937   Vsjis_coding_system = Qnil;
8938
8939   staticpro (&Vbig5_coding_system);
8940   Vbig5_coding_system = Qnil;
8941
8942   staticpro (&Vcode_conversion_reused_workbuf);
8943   Vcode_conversion_reused_workbuf = Qnil;
8944
8945   staticpro (&Vcode_conversion_workbuf_name);
8946   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
8947
8948   reused_workbuf_in_use = 0;
8949
8950   DEFSYM (Qcharset, "charset");
8951   DEFSYM (Qtarget_idx, "target-idx");
8952   DEFSYM (Qcoding_system_history, "coding-system-history");
8953   Fset (Qcoding_system_history, Qnil);
8954
8955   /* Target FILENAME is the first argument.  */
8956   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
8957   /* Target FILENAME is the third argument.  */
8958   Fput (Qwrite_region, Qtarget_idx, make_number (2));
8959
8960   DEFSYM (Qcall_process, "call-process");
8961   /* Target PROGRAM is the first argument.  */
8962   Fput (Qcall_process, Qtarget_idx, make_number (0));
8963
8964   DEFSYM (Qcall_process_region, "call-process-region");
8965   /* Target PROGRAM is the third argument.  */
8966   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
8967
8968   DEFSYM (Qstart_process, "start-process");
8969   /* Target PROGRAM is the third argument.  */
8970   Fput (Qstart_process, Qtarget_idx, make_number (2));
8971
8972   DEFSYM (Qopen_network_stream, "open-network-stream");
8973   /* Target SERVICE is the fourth argument.  */
8974   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8975
8976   DEFSYM (Qcoding_system, "coding-system");
8977   DEFSYM (Qcoding_aliases, "coding-aliases");
8978
8979   DEFSYM (Qeol_type, "eol-type");
8980   DEFSYM (Qunix, "unix");
8981   DEFSYM (Qdos, "dos");
8982
8983   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8984   DEFSYM (Qpost_read_conversion, "post-read-conversion");
8985   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8986   DEFSYM (Qdefault_char, "default-char");
8987   DEFSYM (Qundecided, "undecided");
8988   DEFSYM (Qno_conversion, "no-conversion");
8989   DEFSYM (Qraw_text, "raw-text");
8990
8991   DEFSYM (Qiso_2022, "iso-2022");
8992
8993   DEFSYM (Qutf_8, "utf-8");
8994   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
8995
8996   DEFSYM (Qutf_16, "utf-16");
8997   DEFSYM (Qbig, "big");
8998   DEFSYM (Qlittle, "little");
8999
9000   DEFSYM (Qshift_jis, "shift-jis");
9001   DEFSYM (Qbig5, "big5");
9002
9003   DEFSYM (Qcoding_system_p, "coding-system-p");
9004
9005   DEFSYM (Qcoding_system_error, "coding-system-error");
9006   Fput (Qcoding_system_error, Qerror_conditions,
9007         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9008   Fput (Qcoding_system_error, Qerror_message,
9009         build_string ("Invalid coding system"));
9010
9011   /* Intern this now in case it isn't already done.
9012      Setting this variable twice is harmless.
9013      But don't staticpro it here--that is done in alloc.c.  */
9014   Qchar_table_extra_slots = intern ("char-table-extra-slots");
9015
9016   DEFSYM (Qtranslation_table, "translation-table");
9017   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
9018   DEFSYM (Qtranslation_table_id, "translation-table-id");
9019   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9020   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
9021
9022   DEFSYM (Qvalid_codes, "valid-codes");
9023
9024   DEFSYM (Qemacs_mule, "emacs-mule");
9025
9026   DEFSYM (QCcategory, ":category");
9027   DEFSYM (QCmnemonic, ":mnemonic");
9028   DEFSYM (QCdefalut_char, ":default-char");
9029   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9030   DEFSYM (QCencode_translation_table, ":encode-translation-table");
9031   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9032   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
9033
9034   Vcoding_category_table
9035     = Fmake_vector (make_number (coding_category_max), Qnil);
9036   staticpro (&Vcoding_category_table);
9037   /* Followings are target of code detection.  */
9038   ASET (Vcoding_category_table, coding_category_iso_7,
9039         intern ("coding-category-iso-7"));
9040   ASET (Vcoding_category_table, coding_category_iso_7_tight,
9041         intern ("coding-category-iso-7-tight"));
9042   ASET (Vcoding_category_table, coding_category_iso_8_1,
9043         intern ("coding-category-iso-8-1"));
9044   ASET (Vcoding_category_table, coding_category_iso_8_2,
9045         intern ("coding-category-iso-8-2"));
9046   ASET (Vcoding_category_table, coding_category_iso_7_else,
9047         intern ("coding-category-iso-7-else"));
9048   ASET (Vcoding_category_table, coding_category_iso_8_else,
9049         intern ("coding-category-iso-8-else"));
9050   ASET (Vcoding_category_table, coding_category_utf_8,
9051         intern ("coding-category-utf-8"));
9052   ASET (Vcoding_category_table, coding_category_utf_16_be,
9053         intern ("coding-category-utf-16-be"));
9054   ASET (Vcoding_category_table, coding_category_utf_16_auto,
9055         intern ("coding-category-utf-16-auto"));
9056   ASET (Vcoding_category_table, coding_category_utf_16_le,
9057         intern ("coding-category-utf-16-le"));
9058   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9059         intern ("coding-category-utf-16-be-nosig"));
9060   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9061         intern ("coding-category-utf-16-le-nosig"));
9062   ASET (Vcoding_category_table, coding_category_charset,
9063         intern ("coding-category-charset"));
9064   ASET (Vcoding_category_table, coding_category_sjis,
9065         intern ("coding-category-sjis"));
9066   ASET (Vcoding_category_table, coding_category_big5,
9067         intern ("coding-category-big5"));
9068   ASET (Vcoding_category_table, coding_category_ccl,
9069         intern ("coding-category-ccl"));
9070   ASET (Vcoding_category_table, coding_category_emacs_mule,
9071         intern ("coding-category-emacs-mule"));
9072   /* Followings are NOT target of code detection.  */
9073   ASET (Vcoding_category_table, coding_category_raw_text,
9074         intern ("coding-category-raw-text"));
9075   ASET (Vcoding_category_table, coding_category_undecided,
9076         intern ("coding-category-undecided"));
9077
9078   DEFSYM (Qinsufficient_source, "insufficient-source");
9079   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9080   DEFSYM (Qinvalid_source, "invalid-source");
9081   DEFSYM (Qinterrupted, "interrupted");
9082   DEFSYM (Qinsufficient_memory, "insufficient-memory");
9083
9084   defsubr (&Scoding_system_p);
9085   defsubr (&Sread_coding_system);
9086   defsubr (&Sread_non_nil_coding_system);
9087   defsubr (&Scheck_coding_system);
9088   defsubr (&Sdetect_coding_region);
9089   defsubr (&Sdetect_coding_string);
9090   defsubr (&Sfind_coding_systems_region_internal);
9091   defsubr (&Sunencodable_char_position);
9092   defsubr (&Scheck_coding_systems_region);
9093   defsubr (&Sdecode_coding_region);
9094   defsubr (&Sencode_coding_region);
9095   defsubr (&Sdecode_coding_string);
9096   defsubr (&Sencode_coding_string);
9097   defsubr (&Sdecode_sjis_char);
9098   defsubr (&Sencode_sjis_char);
9099   defsubr (&Sdecode_big5_char);
9100   defsubr (&Sencode_big5_char);
9101   defsubr (&Sset_terminal_coding_system_internal);
9102   defsubr (&Sset_safe_terminal_coding_system_internal);
9103   defsubr (&Sterminal_coding_system);
9104   defsubr (&Sset_keyboard_coding_system_internal);
9105   defsubr (&Skeyboard_coding_system);
9106   defsubr (&Sfind_operation_coding_system);
9107   defsubr (&Sset_coding_system_priority);
9108   defsubr (&Sdefine_coding_system_internal);
9109   defsubr (&Sdefine_coding_system_alias);
9110   defsubr (&Scoding_system_put);
9111   defsubr (&Scoding_system_base);
9112   defsubr (&Scoding_system_plist);
9113   defsubr (&Scoding_system_aliases);
9114   defsubr (&Scoding_system_eol_type);
9115   defsubr (&Scoding_system_priority_list);
9116
9117   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
9118                doc: /* List of coding systems.
9119
9120 Do not alter the value of this variable manually.  This variable should be
9121 updated by the functions `define-coding-system' and
9122 `define-coding-system-alias'.  */);
9123   Vcoding_system_list = Qnil;
9124
9125   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
9126                doc: /* Alist of coding system names.
9127 Each element is one element list of coding system name.
9128 This variable is given to `completing-read' as TABLE argument.
9129
9130 Do not alter the value of this variable manually.  This variable should be
9131 updated by the functions `make-coding-system' and
9132 `define-coding-system-alias'.  */);
9133   Vcoding_system_alist = Qnil;
9134
9135   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
9136                doc: /* List of coding-categories (symbols) ordered by priority.
9137
9138 On detecting a coding system, Emacs tries code detection algorithms
9139 associated with each coding-category one by one in this order.  When
9140 one algorithm agrees with a byte sequence of source text, the coding
9141 system bound to the corresponding coding-category is selected.  */);
9142   {
9143     int i;
9144
9145     Vcoding_category_list = Qnil;
9146     for (i = coding_category_max - 1; i >= 0; i--)
9147       Vcoding_category_list
9148         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
9149                  Vcoding_category_list);
9150   }
9151
9152   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
9153                doc: /* Specify the coding system for read operations.
9154 It is useful to bind this variable with `let', but do not set it globally.
9155 If the value is a coding system, it is used for decoding on read operation.
9156 If not, an appropriate element is used from one of the coding system alists:
9157 There are three such tables, `file-coding-system-alist',
9158 `process-coding-system-alist', and `network-coding-system-alist'.  */);
9159   Vcoding_system_for_read = Qnil;
9160
9161   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
9162                doc: /* Specify the coding system for write operations.
9163 Programs bind this variable with `let', but you should not set it globally.
9164 If the value is a coding system, it is used for encoding of output,
9165 when writing it to a file and when sending it to a file or subprocess.
9166
9167 If this does not specify a coding system, an appropriate element
9168 is used from one of the coding system alists:
9169 There are three such tables, `file-coding-system-alist',
9170 `process-coding-system-alist', and `network-coding-system-alist'.
9171 For output to files, if the above procedure does not specify a coding system,
9172 the value of `buffer-file-coding-system' is used.  */);
9173   Vcoding_system_for_write = Qnil;
9174
9175   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
9176                doc: /*
9177 Coding system used in the latest file or process I/O.  */);
9178   Vlast_coding_system_used = Qnil;
9179
9180   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
9181                doc: /*
9182 Error status of the last code conversion.
9183
9184 When an error was detected in the last code conversion, this variable
9185 is set to one of the following symbols.
9186   `insufficient-source'
9187   `inconsistent-eol'
9188   `invalid-source'
9189   `interrupted'
9190   `insufficient-memory'
9191 When no error was detected, the value doesn't change.  So, to check
9192 the error status of a code conversion by this variable, you must
9193 explicitly set this variable to nil before performing code
9194 conversion.  */);
9195   Vlast_code_conversion_error = Qnil;
9196
9197   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
9198                doc: /*
9199 *Non-nil means always inhibit code conversion of end-of-line format.
9200 See info node `Coding Systems' and info node `Text and Binary' concerning
9201 such conversion.  */);
9202   inhibit_eol_conversion = 0;
9203
9204   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
9205                doc: /*
9206 Non-nil means process buffer inherits coding system of process output.
9207 Bind it to t if the process output is to be treated as if it were a file
9208 read from some filesystem.  */);
9209   inherit_process_coding_system = 0;
9210
9211   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
9212                doc: /*
9213 Alist to decide a coding system to use for a file I/O operation.
9214 The format is ((PATTERN . VAL) ...),
9215 where PATTERN is a regular expression matching a file name,
9216 VAL is a coding system, a cons of coding systems, or a function symbol.
9217 If VAL is a coding system, it is used for both decoding and encoding
9218 the file contents.
9219 If VAL is a cons of coding systems, the car part is used for decoding,
9220 and the cdr part is used for encoding.
9221 If VAL is a function symbol, the function must return a coding system
9222 or a cons of coding systems which are used as above.  The function gets
9223 the arguments with which `find-operation-coding-systems' was called.
9224
9225 See also the function `find-operation-coding-system'
9226 and the variable `auto-coding-alist'.  */);
9227   Vfile_coding_system_alist = Qnil;
9228
9229   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
9230                doc: /*
9231 Alist to decide a coding system to use for a process I/O operation.
9232 The format is ((PATTERN . VAL) ...),
9233 where PATTERN is a regular expression matching a program name,
9234 VAL is a coding system, a cons of coding systems, or a function symbol.
9235 If VAL is a coding system, it is used for both decoding what received
9236 from the program and encoding what sent to the program.
9237 If VAL is a cons of coding systems, the car part is used for decoding,
9238 and the cdr part is used for encoding.
9239 If VAL is a function symbol, the function must return a coding system
9240 or a cons of coding systems which are used as above.
9241
9242 See also the function `find-operation-coding-system'.  */);
9243   Vprocess_coding_system_alist = Qnil;
9244
9245   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
9246                doc: /*
9247 Alist to decide a coding system to use for a network I/O operation.
9248 The format is ((PATTERN . VAL) ...),
9249 where PATTERN is a regular expression matching a network service name
9250 or is a port number to connect to,
9251 VAL is a coding system, a cons of coding systems, or a function symbol.
9252 If VAL is a coding system, it is used for both decoding what received
9253 from the network stream and encoding what sent to the network stream.
9254 If VAL is a cons of coding systems, the car part is used for decoding,
9255 and the cdr part is used for encoding.
9256 If VAL is a function symbol, the function must return a coding system
9257 or a cons of coding systems which are used as above.
9258
9259 See also the function `find-operation-coding-system'.  */);
9260   Vnetwork_coding_system_alist = Qnil;
9261
9262   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
9263                doc: /* Coding system to use with system messages.
9264 Also used for decoding keyboard input on X Window system.  */);
9265   Vlocale_coding_system = Qnil;
9266
9267   /* The eol mnemonics are reset in startup.el system-dependently.  */
9268   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
9269                doc: /*
9270 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
9271   eol_mnemonic_unix = build_string (":");
9272
9273   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
9274                doc: /*
9275 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
9276   eol_mnemonic_dos = build_string ("\\");
9277
9278   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
9279                doc: /*
9280 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
9281   eol_mnemonic_mac = build_string ("/");
9282
9283   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
9284                doc: /*
9285 *String displayed in mode line when end-of-line format is not yet determined.  */);
9286   eol_mnemonic_undecided = build_string (":");
9287
9288   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
9289                doc: /*
9290 *Non-nil enables character translation while encoding and decoding.  */);
9291   Venable_character_translation = Qt;
9292
9293   DEFVAR_LISP ("standard-translation-table-for-decode",
9294                &Vstandard_translation_table_for_decode,
9295                doc: /* Table for translating characters while decoding.  */);
9296   Vstandard_translation_table_for_decode = Qnil;
9297
9298   DEFVAR_LISP ("standard-translation-table-for-encode",
9299                &Vstandard_translation_table_for_encode,
9300                doc: /* Table for translating characters while encoding.  */);
9301   Vstandard_translation_table_for_encode = Qnil;
9302
9303   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
9304                doc: /* Alist of charsets vs revision numbers.
9305 While encoding, if a charset (car part of an element) is found,
9306 designate it with the escape sequence identifying revision (cdr part
9307 of the element).  */);
9308   Vcharset_revision_table = Qnil;
9309
9310   DEFVAR_LISP ("default-process-coding-system",
9311                &Vdefault_process_coding_system,
9312                doc: /* Cons of coding systems used for process I/O by default.
9313 The car part is used for decoding a process output,
9314 the cdr part is used for encoding a text to be sent to a process.  */);
9315   Vdefault_process_coding_system = Qnil;
9316
9317   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
9318                doc: /*
9319 Table of extra Latin codes in the range 128..159 (inclusive).
9320 This is a vector of length 256.
9321 If Nth element is non-nil, the existence of code N in a file
9322 \(or output of subprocess) doesn't prevent it to be detected as
9323 a coding system of ISO 2022 variant which has a flag
9324 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9325 or reading output of a subprocess.
9326 Only 128th through 159th elements has a meaning.  */);
9327   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
9328
9329   DEFVAR_LISP ("select-safe-coding-system-function",
9330                &Vselect_safe_coding_system_function,
9331                doc: /*
9332 Function to call to select safe coding system for encoding a text.
9333
9334 If set, this function is called to force a user to select a proper
9335 coding system which can encode the text in the case that a default
9336 coding system used in each operation can't encode the text.
9337
9338 The default value is `select-safe-coding-system' (which see).  */);
9339   Vselect_safe_coding_system_function = Qnil;
9340
9341   DEFVAR_BOOL ("coding-system-require-warning",
9342                &coding_system_require_warning,
9343                doc: /* Internal use only.
9344 If non-nil, on writing a file, `select-safe-coding-system-function' is
9345 called even if `coding-system-for-write' is non-nil.  The command
9346 `universal-coding-system-argument' binds this variable to t temporarily.  */);
9347   coding_system_require_warning = 0;
9348
9349
9350   DEFVAR_BOOL ("inhibit-iso-escape-detection",
9351                &inhibit_iso_escape_detection,
9352                doc: /*
9353 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9354
9355 By default, on reading a file, Emacs tries to detect how the text is
9356 encoded.  This code detection is sensitive to escape sequences.  If
9357 the sequence is valid as ISO2022, the code is determined as one of
9358 the ISO2022 encodings, and the file is decoded by the corresponding
9359 coding system (e.g. `iso-2022-7bit').
9360
9361 However, there may be a case that you want to read escape sequences in
9362 a file as is.  In such a case, you can set this variable to non-nil.
9363 Then, as the code detection ignores any escape sequences, no file is
9364 detected as encoded in some ISO2022 encoding.  The result is that all
9365 escape sequences become visible in a buffer.
9366
9367 The default value is nil, and it is strongly recommended not to change
9368 it.  That is because many Emacs Lisp source files that contain
9369 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9370 in Emacs's distribution, and they won't be decoded correctly on
9371 reading if you suppress escape sequence detection.
9372
9373 The other way to read escape sequences in a file without decoding is
9374 to explicitly specify some coding system that doesn't use ISO2022's
9375 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
9376   inhibit_iso_escape_detection = 0;
9377
9378   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
9379                doc: /* Char table for translating self-inserting characters.
9380 This is applied to the result of input methods, not their input.  See also
9381 `keyboard-translate-table'.  */);
9382     Vtranslation_table_for_input = Qnil;
9383
9384   {
9385     Lisp_Object args[coding_arg_max];
9386     Lisp_Object plist[16];
9387     int i;
9388
9389     for (i = 0; i < coding_arg_max; i++)
9390       args[i] = Qnil;
9391
9392     plist[0] = intern (":name");
9393     plist[1] = args[coding_arg_name] = Qno_conversion;
9394     plist[2] = intern (":mnemonic");
9395     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
9396     plist[4] = intern (":coding-type");
9397     plist[5] = args[coding_arg_coding_type] = Qraw_text;
9398     plist[6] = intern (":ascii-compatible-p");
9399     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
9400     plist[8] = intern (":default-char");
9401     plist[9] = args[coding_arg_default_char] = make_number (0);
9402     plist[10] = intern (":for-unibyte");
9403     plist[11] = args[coding_arg_for_unibyte] = Qt;
9404     plist[12] = intern (":docstring");
9405     plist[13] = build_string ("Do no conversion.\n\
9406 \n\
9407 When you visit a file with this coding, the file is read into a\n\
9408 unibyte buffer as is, thus each byte of a file is treated as a\n\
9409 character.");
9410     plist[14] = intern (":eol-type");
9411     plist[15] = args[coding_arg_eol_type] = Qunix;
9412     args[coding_arg_plist] = Flist (16, plist);
9413     Fdefine_coding_system_internal (coding_arg_max, args);
9414   }
9415
9416   setup_coding_system (Qno_conversion, &keyboard_coding);
9417   setup_coding_system (Qno_conversion, &terminal_coding);
9418   setup_coding_system (Qno_conversion, &safe_terminal_coding);
9419
9420   {
9421     int i;
9422
9423     for (i = 0; i < coding_category_max; i++)
9424       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
9425   }
9426 }
9427
9428 char *
9429 emacs_strerror (error_number)
9430      int error_number;
9431 {
9432   char *str;
9433
9434   synchronize_system_messages_locale ();
9435   str = strerror (error_number);
9436
9437   if (! NILP (Vlocale_coding_system))
9438     {
9439       Lisp_Object dec = code_convert_string_norecord (build_string (str),
9440                                                       Vlocale_coding_system,
9441                                                       0);
9442       str = (char *) SDATA (dec);
9443     }
9444
9445   return str;
9446 }
9447
9448 #endif /* emacs */