src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3      Licensed to the Free Software Foundation.
   4    Copyright (C) 2001, 2002 Free Software Foundation, Inc.
   5    Copyright (C) 2003
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H13PRO009
   8
   9 This file is part of GNU Emacs.
  10
  11 GNU Emacs is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU General Public License as published by
  13 the Free Software Foundation; either version 2, or (at your option)
  14 any later version.
  15
  16 GNU Emacs is distributed in the hope that it will be useful,
  17 but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 GNU General Public License for more details.
  20
  21 You should have received a copy of the GNU General Public License
  22 along with GNU Emacs; see the file COPYING.  If not, write to
  23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  24 Boston, MA 02111-1307, USA.  */
  25
  26 /*** TABLE OF CONTENTS ***
  27
  28   0. General comments
  29   1. Preamble
  30   2. Emacs' internal format (emacs-utf-8) handlers
  31   3. UTF-8 handlers
  32   4. UTF-16 handlers
  33   5. Charset-base coding systems handlers
  34   6. emacs-mule (old Emacs' internal format) handlers
  35   7. ISO2022 handlers
  36   8. Shift-JIS and BIG5 handlers
  37   9. CCL handlers
  38   10. C library functions
  39   11. Emacs Lisp library functions
  40   12. Postamble
  41
  42 */
  43
  44 /*** 0. General comments ***
  45
  46
  47 CODING SYSTEM
  48
  49   A coding system is an object for an encoding mechanism that contains
  50   information about how to convert byte sequences to character
  51   sequences and vice versa.  When we say "decode", it means converting
  52   a byte sequence of a specific coding system into a character
  53   sequence that is represented by Emacs' internal coding system
  54   `emacs-utf-8', and when we say "encode", it means converting a
  55   character sequence of emacs-utf-8 to a byte sequence of a specific
  56   coding system.
  57
  58   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  59   C level, a coding system is represented by a vector of attributes
  60   stored in the hash table Vcharset_hash_table.  The conversion from
  61   coding system symbol to attributes vector is done by looking up
  62   Vcharset_hash_table by the symbol.
  63
  64   Coding systems are classified into the following types depending on
  65   the encoding mechanism.  Here's a brief description of the types.
  66
  67   o UTF-8
  68
  69   o UTF-16
  70
  71   o Charset-base coding system
  72
  73   A coding system defined by one or more (coded) character sets.
  74   Decoding and encoding are done by a code converter defined for each
  75   character set.
  76
  77   o Old Emacs internal format (emacs-mule)
  78
  79   The coding system adopted by old versions of Emacs (20 and 21).
  80
  81   o ISO2022-base coding system
  82
  83   The most famous coding system for multiple character sets.  X's
  84   Compound Text, various EUCs (Extended Unix Code), and coding systems
  85   used in the Internet communication such as ISO-2022-JP are all
  86   variants of ISO2022.
  87
  88   o SJIS (or Shift-JIS or MS-Kanji-Code)
  89
  90   A coding system to encode character sets: ASCII, JISX0201, and
  91   JISX0208.  Widely used for PC's in Japan.  Details are described in
  92   section 8.
  93
  94   o BIG5
  95
  96   A coding system to encode character sets: ASCII and Big5.  Widely
  97   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  98   described in section 8.  In this file, when we write "big5" (all
  99   lowercase), we mean the coding system, and when we write "Big5"
 100   (capitalized), we mean the character set.
 101
 102   o CCL
 103
 104   If a user wants to decode/encode text encoded in a coding system
 105   not listed above, he can supply a decoder and an encoder for it in
 106   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 107   program while decoding/encoding.
 108
 109   o Raw-text
 110
 111   A coding system for text containing raw eight-bit data.  Emacs
 112   treats each byte of source text as a character (except for
 113   end-of-line conversion).
 114
 115   o No-conversion
 116
 117   Like raw text, but don't do end-of-line conversion.
 118
 119
 120 END-OF-LINE FORMAT
 121
 122   How text end-of-line is encoded depends on operating system.  For
 123   instance, Unix's format is just one byte of LF (line-feed) code,
 124   whereas DOS's format is two-byte sequence of `carriage-return' and
 125   `line-feed' codes.  MacOS's format is usually one byte of
 126   `carriage-return'.
 127
 128   Since text character encoding and end-of-line encoding are
 129   independent, any coding system described above can take any format
 130   of end-of-line (except for no-conversion).
 131
 132 STRUCT CODING_SYSTEM
 133
 134   Before using a coding system for code conversion (i.e. decoding and
 135   encoding), we setup a structure of type `struct coding_system'.
 136   This structure keeps various information about a specific code
 137   conversion (e.g. the location of source and destination data).
 138
 139 */
 140
 141 /* COMMON MACROS */
 142
 143
 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 145
 146   These functions check if a byte sequence specified as a source in
 147   CODING conforms to the format of XXX, and update the members of
 148   DETECT_INFO.
 149
 150   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 151
 152   Below is the template of these functions.  */
 153
 154 #if 0
 155 static int
 156 detect_coding_XXX (coding, detect_info)
 157      struct coding_system *coding;
 158      struct coding_detection_info *detect_info;
 159 {
 160   unsigned char *src = coding->source;
 161   unsigned char *src_end = coding->source + coding->src_bytes;
 162   int multibytep = coding->src_multibyte;
 163   int consumed_chars = 0;
 164   int found = 0;
 165   ...;
 166
 167   while (1)
 168     {
 169       /* Get one byte from the source.  If the souce is exausted, jump
 170          to no_more_source:.  */
 171       ONE_MORE_BYTE (c);
 172
 173       if (! __C_conforms_to_XXX___ (c))
 174         break;
 175       if (! __C_strongly_suggests_XXX__ (c))
 176         found = CATEGORY_MASK_XXX;
 177     }
 178   /* The byte sequence is invalid for XXX.  */
 179   detect_info->rejected |= CATEGORY_MASK_XXX;
 180   return 0;
 181
 182  no_more_source:
 183   /* The source exausted successfully.  */
 184   detect_info->found |= found;
 185   return 1;
 186 }
 187 #endif
 188
 189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 190
 191   These functions decode a byte sequence specified as a source by
 192   CODING.  The resulting multibyte text goes to a place pointed to by
 193   CODING->charbuf, the length of which should not exceed
 194   CODING->charbuf_size;
 195
 196   These functions set the information of original and decoded texts in
 197   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 198   They also set CODING->result to one of CODING_RESULT_XXX indicating
 199   how the decoding is finished.
 200
 201   Below is the template of these functions.  */
 202
 203 #if 0
 204 static void
 205 decode_coding_XXXX (coding)
 206      struct coding_system *coding;
 207 {
 208   unsigned char *src = coding->source + coding->consumed;
 209   unsigned char *src_end = coding->source + coding->src_bytes;
 210   /* SRC_BASE remembers the start position in source in each loop.
 211      The loop will be exited when there's not enough source code, or
 212      when there's no room in CHARBUF for a decoded character.  */
 213   unsigned char *src_base;
 214   /* A buffer to produce decoded characters.  */
 215   int *charbuf = coding->charbuf;
 216   int *charbuf_end = charbuf + coding->charbuf_size;
 217   int multibytep = coding->src_multibyte;
 218
 219   while (1)
 220     {
 221       src_base = src;
 222       if (charbuf < charbuf_end)
 223         /* No more room to produce a decoded character.  */
 224         break;
 225       ONE_MORE_BYTE (c);
 226       /* Decode it. */
 227     }
 228
 229  no_more_source:
 230   if (src_base < src_end
 231       && coding->mode & CODING_MODE_LAST_BLOCK)
 232     /* If the source ends by partial bytes to construct a character,
 233        treat them as eight-bit raw data.  */
 234     while (src_base < src_end && charbuf < charbuf_end)
 235       *charbuf++ = *src_base++;
 236   /* Remember how many bytes and characters we consumed.  If the
 237      source is multibyte, the bytes and chars are not identical.  */
 238   coding->consumed = coding->consumed_char = src_base - coding->source;
 239   /* Remember how many characters we produced.  */
 240   coding->charbuf_used = charbuf - coding->charbuf;
 241 }
 242 #endif
 243
 244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 245
 246   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 247   internal multibyte format by CODING.  The resulting byte sequence
 248   goes to a place pointed to by DESTINATION, the length of which
 249   should not exceed DST_BYTES.
 250
 251   These functions set the information of original and encoded texts in
 252   the members produced, produced_char, consumed, and consumed_char of
 253   the structure *CODING.  They also set the member result to one of
 254   CODING_RESULT_XXX indicating how the encoding finished.
 255
 256   DST_BYTES zero means that source area and destination area are
 257   overlapped, which means that we can produce a encoded text until it
 258   reaches at the head of not-yet-encoded source text.
 259
 260   Below is a template of these functions.  */
 261 #if 0
 262 static void
 263 encode_coding_XXX (coding)
 264      struct coding_system *coding;
 265 {
 266   int multibytep = coding->dst_multibyte;
 267   int *charbuf = coding->charbuf;
 268   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 269   unsigned char *dst = coding->destination + coding->produced;
 270   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 271   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 272   int produced_chars = 0;
 273
 274   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 275     {
 276       int c = *charbuf;
 277       /* Encode C into DST, and increment DST.  */
 278     }
 279  label_no_more_destination:
 280   /* How many chars and bytes we produced.  */
 281   coding->produced_char += produced_chars;
 282   coding->produced = dst - coding->destination;
 283 }
 284 #endif
 285
 286 \f
 287 /*** 1. Preamble ***/
 288
 289 #include <config.h>
 290 #include <stdio.h>
 291
 292 #include "lisp.h"
 293 #include "buffer.h"
 294 #include "character.h"
 295 #include "charset.h"
 296 #include "ccl.h"
 297 #include "composite.h"
 298 #include "coding.h"
 299 #include "window.h"
 300
 301 Lisp_Object Vcoding_system_hash_table;
 302
 303 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 304 Lisp_Object Qunix, Qdos;
 305 extern Lisp_Object Qmac;        /* frame.c */
 306 Lisp_Object Qbuffer_file_coding_system;
 307 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 308 Lisp_Object Qdefault_char;
 309 Lisp_Object Qno_conversion, Qundecided;
 310 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 311 Lisp_Object Qbig, Qlittle;
 312 Lisp_Object Qcoding_system_history;
 313 Lisp_Object Qvalid_codes;
 314 Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
 315 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 316 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 317
 318 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 319 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 320 Lisp_Object Qstart_process, Qopen_network_stream;
 321 Lisp_Object Qtarget_idx;
 322
 323 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 324 Lisp_Object Qinterrupted, Qinsufficient_memory;
 325
 326 int coding_system_require_warning;
 327
 328 Lisp_Object Vselect_safe_coding_system_function;
 329
 330 /* Mnemonic string for each format of end-of-line.  */
 331 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 332 /* Mnemonic string to indicate format of end-of-line is not yet
 333    decided.  */
 334 Lisp_Object eol_mnemonic_undecided;
 335
 336 #ifdef emacs
 337
 338 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 339
 340 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 341
 342 /* Coding system emacs-mule and raw-text are for converting only
 343    end-of-line format.  */
 344 Lisp_Object Qemacs_mule, Qraw_text;
 345 Lisp_Object Qutf_8_emacs;
 346
 347 /* Coding-systems are handed between Emacs Lisp programs and C internal
 348    routines by the following three variables.  */
 349 /* Coding-system for reading files and receiving data from process.  */
 350 Lisp_Object Vcoding_system_for_read;
 351 /* Coding-system for writing files and sending data to process.  */
 352 Lisp_Object Vcoding_system_for_write;
 353 /* Coding-system actually used in the latest I/O.  */
 354 Lisp_Object Vlast_coding_system_used;
 355 /* Set to non-nil when an error is detected while code conversion.  */
 356 Lisp_Object Vlast_code_conversion_error;
 357 /* A vector of length 256 which contains information about special
 358    Latin codes (especially for dealing with Microsoft codes).  */
 359 Lisp_Object Vlatin_extra_code_table;
 360
 361 /* Flag to inhibit code conversion of end-of-line format.  */
 362 int inhibit_eol_conversion;
 363
 364 /* Flag to inhibit ISO2022 escape sequence detection.  */
 365 int inhibit_iso_escape_detection;
 366
 367 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 368 int inherit_process_coding_system;
 369
 370 /* Coding system to be used to encode text for terminal display.  */
 371 struct coding_system terminal_coding;
 372
 373 /* Coding system to be used to encode text for terminal display when
 374    terminal coding system is nil.  */
 375 struct coding_system safe_terminal_coding;
 376
 377 /* Coding system of what is sent from terminal keyboard.  */
 378 struct coding_system keyboard_coding;
 379
 380 Lisp_Object Vfile_coding_system_alist;
 381 Lisp_Object Vprocess_coding_system_alist;
 382 Lisp_Object Vnetwork_coding_system_alist;
 383
 384 Lisp_Object Vlocale_coding_system;
 385
 386 #endif /* emacs */
 387
 388 /* Flag to tell if we look up translation table on character code
 389    conversion.  */
 390 Lisp_Object Venable_character_translation;
 391 /* Standard translation table to look up on decoding (reading).  */
 392 Lisp_Object Vstandard_translation_table_for_decode;
 393 /* Standard translation table to look up on encoding (writing).  */
 394 Lisp_Object Vstandard_translation_table_for_encode;
 395
 396 Lisp_Object Qtranslation_table;
 397 Lisp_Object Qtranslation_table_id;
 398 Lisp_Object Qtranslation_table_for_decode;
 399 Lisp_Object Qtranslation_table_for_encode;
 400
 401 /* Alist of charsets vs revision number.  */
 402 static Lisp_Object Vcharset_revision_table;
 403
 404 /* Default coding systems used for process I/O.  */
 405 Lisp_Object Vdefault_process_coding_system;
 406
 407 /* Char table for translating Quail and self-inserting input.  */
 408 Lisp_Object Vtranslation_table_for_input;
 409
 410 /* Two special coding systems.  */
 411 Lisp_Object Vsjis_coding_system;
 412 Lisp_Object Vbig5_coding_system;
 413
 414 static void record_conversion_result (struct coding_system *coding,
 415                                       enum coding_result_code result);
 416 static int detect_coding_utf_8 P_ ((struct coding_system *,
 417                                     struct coding_detection_info *info));
 418 static void decode_coding_utf_8 P_ ((struct coding_system *));
 419 static int encode_coding_utf_8 P_ ((struct coding_system *));
 420
 421 static int detect_coding_utf_16 P_ ((struct coding_system *,
 422                                      struct coding_detection_info *info));
 423 static void decode_coding_utf_16 P_ ((struct coding_system *));
 424 static int encode_coding_utf_16 P_ ((struct coding_system *));
 425
 426 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 427                                        struct coding_detection_info *info));
 428 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 429 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 430
 431 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 432                                          struct coding_detection_info *info));
 433 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 434 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 435
 436 static int detect_coding_sjis P_ ((struct coding_system *,
 437                                    struct coding_detection_info *info));
 438 static void decode_coding_sjis P_ ((struct coding_system *));
 439 static int encode_coding_sjis P_ ((struct coding_system *));
 440
 441 static int detect_coding_big5 P_ ((struct coding_system *,
 442                                    struct coding_detection_info *info));
 443 static void decode_coding_big5 P_ ((struct coding_system *));
 444 static int encode_coding_big5 P_ ((struct coding_system *));
 445
 446 static int detect_coding_ccl P_ ((struct coding_system *,
 447                                   struct coding_detection_info *info));
 448 static void decode_coding_ccl P_ ((struct coding_system *));
 449 static int encode_coding_ccl P_ ((struct coding_system *));
 450
 451 static void decode_coding_raw_text P_ ((struct coding_system *));
 452 static int encode_coding_raw_text P_ ((struct coding_system *));
 453
 454
 455 /* ISO2022 section */
 456
 457 #define CODING_ISO_INITIAL(coding, reg)                 \
 458   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 459                      coding_attr_iso_initial),          \
 460                reg)))
 461
 462
 463 #define CODING_ISO_REQUEST(coding, charset_id)  \
 464   ((charset_id <= (coding)->max_charset_id      \
 465     ? (coding)->safe_charsets[charset_id]       \
 466     : -1))
 467
 468
 469 #define CODING_ISO_FLAGS(coding)        \
 470   ((coding)->spec.iso_2022.flags)
 471 #define CODING_ISO_DESIGNATION(coding, reg)     \
 472   ((coding)->spec.iso_2022.current_designation[reg])
 473 #define CODING_ISO_INVOCATION(coding, plane)    \
 474   ((coding)->spec.iso_2022.current_invocation[plane])
 475 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 476   ((coding)->spec.iso_2022.single_shifting)
 477 #define CODING_ISO_BOL(coding)  \
 478   ((coding)->spec.iso_2022.bol)
 479 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 480   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 481
 482 /* Control characters of ISO2022.  */
 483                         /* code */      /* function */
 484 #define ISO_CODE_LF     0x0A            /* line-feed */
 485 #define ISO_CODE_CR     0x0D            /* carriage-return */
 486 #define ISO_CODE_SO     0x0E            /* shift-out */
 487 #define ISO_CODE_SI     0x0F            /* shift-in */
 488 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 489 #define ISO_CODE_ESC    0x1B            /* escape */
 490 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 491 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 492 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 493
 494 /* All code (1-byte) of ISO2022 is classified into one of the
 495    followings.  */
 496 enum iso_code_class_type
 497   {
 498     ISO_control_0,              /* Control codes in the range
 499                                    0x00..0x1F and 0x7F, except for the
 500                                    following 5 codes.  */
 501     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 502     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 503     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 504     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 505     ISO_control_1,              /* Control codes in the range
 506                                    0x80..0x9F, except for the
 507                                    following 3 codes.  */
 508     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 509     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 510     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 511     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 512     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 513     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 514     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 515   };
 516
 517 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 518     `iso-flags' attribute of an iso2022 coding system.  */
 519
 520 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 521    instead of the correct short-form sequence (e.g. ESC $ A).  */
 522 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 523
 524 /* If set, reset graphic planes and registers at end-of-line to the
 525    initial state.  */
 526 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 527
 528 /* If set, reset graphic planes and registers before any control
 529    characters to the initial state.  */
 530 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 531
 532 /* If set, encode by 7-bit environment.  */
 533 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 534
 535 /* If set, use locking-shift function.  */
 536 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 537
 538 /* If set, use single-shift function.  Overwrite
 539    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 540 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 541
 542 /* If set, use designation escape sequence.  */
 543 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 544
 545 /* If set, produce revision number sequence.  */
 546 #define CODING_ISO_FLAG_REVISION        0x0080
 547
 548 /* If set, produce ISO6429's direction specifying sequence.  */
 549 #define CODING_ISO_FLAG_DIRECTION       0x0100
 550
 551 /* If set, assume designation states are reset at beginning of line on
 552    output.  */
 553 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 554
 555 /* If set, designation sequence should be placed at beginning of line
 556    on output.  */
 557 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 558
 559 /* If set, do not encode unsafe charactes on output.  */
 560 #define CODING_ISO_FLAG_SAFE            0x0800
 561
 562 /* If set, extra latin codes (128..159) are accepted as a valid code
 563    on input.  */
 564 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 565
 566 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 567
 568 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 569
 570 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 571
 572 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 573
 574 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 575
 576 /* A character to be produced on output if encoding of the original
 577    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 578 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 579
 580
 581 /* UTF-16 section */
 582 #define CODING_UTF_16_BOM(coding)       \
 583   ((coding)->spec.utf_16.bom)
 584
 585 #define CODING_UTF_16_ENDIAN(coding)    \
 586   ((coding)->spec.utf_16.endian)
 587
 588 #define CODING_UTF_16_SURROGATE(coding) \
 589   ((coding)->spec.utf_16.surrogate)
 590
 591
 592 /* CCL section */
 593 #define CODING_CCL_DECODER(coding)      \
 594   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 595 #define CODING_CCL_ENCODER(coding)      \
 596   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 597 #define CODING_CCL_VALIDS(coding)                                          \
 598   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 599
 600 /* Index for each coding category in `coding_categories' */
 601
 602 enum coding_category
 603   {
 604     coding_category_iso_7,
 605     coding_category_iso_7_tight,
 606     coding_category_iso_8_1,
 607     coding_category_iso_8_2,
 608     coding_category_iso_7_else,
 609     coding_category_iso_8_else,
 610     coding_category_utf_8,
 611     coding_category_utf_16_auto,
 612     coding_category_utf_16_be,
 613     coding_category_utf_16_le,
 614     coding_category_utf_16_be_nosig,
 615     coding_category_utf_16_le_nosig,
 616     coding_category_charset,
 617     coding_category_sjis,
 618     coding_category_big5,
 619     coding_category_ccl,
 620     coding_category_emacs_mule,
 621     /* All above are targets of code detection.  */
 622     coding_category_raw_text,
 623     coding_category_undecided,
 624     coding_category_max
 625   };
 626
 627 /* Definitions of flag bits used in detect_coding_XXXX.  */
 628 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 629 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 630 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 631 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 632 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 633 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 634 #define CATEGORY_MASK_UTF_8             (1 << coding_category_utf_8)
 635 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 636 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 637 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 638 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 639 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 640 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 641 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 642 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 643 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 644 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 645 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 646
 647 /* This value is returned if detect_coding_mask () find nothing other
 648    than ASCII characters.  */
 649 #define CATEGORY_MASK_ANY               \
 650   (CATEGORY_MASK_ISO_7                  \
 651    | CATEGORY_MASK_ISO_7_TIGHT          \
 652    | CATEGORY_MASK_ISO_8_1              \
 653    | CATEGORY_MASK_ISO_8_2              \
 654    | CATEGORY_MASK_ISO_7_ELSE           \
 655    | CATEGORY_MASK_ISO_8_ELSE           \
 656    | CATEGORY_MASK_UTF_8                \
 657    | CATEGORY_MASK_UTF_16_BE            \
 658    | CATEGORY_MASK_UTF_16_LE            \
 659    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 660    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 661    | CATEGORY_MASK_CHARSET              \
 662    | CATEGORY_MASK_SJIS                 \
 663    | CATEGORY_MASK_BIG5                 \
 664    | CATEGORY_MASK_CCL                  \
 665    | CATEGORY_MASK_EMACS_MULE)
 666
 667
 668 #define CATEGORY_MASK_ISO_7BIT \
 669   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 670
 671 #define CATEGORY_MASK_ISO_8BIT \
 672   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 673
 674 #define CATEGORY_MASK_ISO_ELSE \
 675   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 676
 677 #define CATEGORY_MASK_ISO_ESCAPE        \
 678   (CATEGORY_MASK_ISO_7                  \
 679    | CATEGORY_MASK_ISO_7_TIGHT          \
 680    | CATEGORY_MASK_ISO_7_ELSE           \
 681    | CATEGORY_MASK_ISO_8_ELSE)
 682
 683 #define CATEGORY_MASK_ISO       \
 684   (  CATEGORY_MASK_ISO_7BIT     \
 685      | CATEGORY_MASK_ISO_8BIT   \
 686      | CATEGORY_MASK_ISO_ELSE)
 687
 688 #define CATEGORY_MASK_UTF_16            \
 689   (CATEGORY_MASK_UTF_16_BE              \
 690    | CATEGORY_MASK_UTF_16_LE            \
 691    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 692    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 693
 694
 695 /* List of symbols `coding-category-xxx' ordered by priority.  This
 696    variable is exposed to Emacs Lisp.  */
 697 static Lisp_Object Vcoding_category_list;
 698
 699 /* Table of coding categories (Lisp symbols).  This variable is for
 700    internal use oly.  */
 701 static Lisp_Object Vcoding_category_table;
 702
 703 /* Table of coding-categories ordered by priority.  */
 704 static enum coding_category coding_priorities[coding_category_max];
 705
 706 /* Nth element is a coding context for the coding system bound to the
 707    Nth coding category.  */
 708 static struct coding_system coding_categories[coding_category_max];
 709
 710 /*** Commonly used macros and functions ***/
 711
 712 #ifndef min
 713 #define min(a, b) ((a) < (b) ? (a) : (b))
 714 #endif
 715 #ifndef max
 716 #define max(a, b) ((a) > (b) ? (a) : (b))
 717 #endif
 718
 719 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 720   do {                                                  \
 721     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 722     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 723   } while (0)
 724
 725
 726 /* Safely get one byte from the source text pointed by SRC which ends
 727    at SRC_END, and set C to that byte.  If there are not enough bytes
 728    in the source, it jumps to `no_more_source'.  If multibytep is
 729    nonzero, and a multibyte character is found at SRC, set C to the
 730    negative value of the character code.  The caller should declare
 731    and set these variables appropriately in advance:
 732         src, src_end, multibytep */
 733
 734 #define ONE_MORE_BYTE(c)                                \
 735   do {                                                  \
 736     if (src == src_end)                                 \
 737       {                                                 \
 738         if (src_base < src)                             \
 739           record_conversion_result                      \
 740             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 741         goto no_more_source;                            \
 742       }                                                 \
 743     c = *src++;                                         \
 744     if (multibytep && (c & 0x80))                       \
 745       {                                                 \
 746         if ((c & 0xFE) == 0xC0)                         \
 747           c = ((c & 1) << 6) | *src++;                  \
 748         else                                            \
 749           {                                             \
 750             c = - string_char (--src, &src, NULL);      \
 751             record_conversion_result                    \
 752               (coding, CODING_RESULT_INVALID_SRC);      \
 753           }                                             \
 754       }                                                 \
 755     consumed_chars++;                                   \
 756   } while (0)
 757
 758
 759 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 760   do {                                                  \
 761     c = *src++;                                         \
 762     if (multibytep && (c & 0x80))                       \
 763       {                                                 \
 764         if ((c & 0xFE) == 0xC0)                         \
 765           c = ((c & 1) << 6) | *src++;                  \
 766         else                                            \
 767           {                                             \
 768             c = - string_char (--src, &src, NULL);      \
 769             record_conversion_result                    \
 770               (coding, CODING_RESULT_INVALID_SRC);      \
 771           }                                             \
 772       }                                                 \
 773     consumed_chars++;                                   \
 774   } while (0)
 775
 776
 777 /* Store a byte C in the place pointed by DST and increment DST to the
 778    next free point, and increment PRODUCED_CHARS.  The caller should
 779    assure that C is 0..127, and declare and set the variable `dst'
 780    appropriately in advance.
 781 */
 782
 783
 784 #define EMIT_ONE_ASCII_BYTE(c)  \
 785   do {                          \
 786     produced_chars++;           \
 787     *dst++ = (c);               \
 788   } while (0)
 789
 790
 791 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 792
 793 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 794   do {                                  \
 795     produced_chars += 2;                \
 796     *dst++ = (c1), *dst++ = (c2);       \
 797   } while (0)
 798
 799
 800 /* Store a byte C in the place pointed by DST and increment DST to the
 801    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 802    nonzero, store in an appropriate multibyte from.  The caller should
 803    declare and set the variables `dst' and `multibytep' appropriately
 804    in advance.  */
 805
 806 #define EMIT_ONE_BYTE(c)                \
 807   do {                                  \
 808     produced_chars++;                   \
 809     if (multibytep)                     \
 810       {                                 \
 811         int ch = (c);                   \
 812         if (ch >= 0x80)                 \
 813           ch = BYTE8_TO_CHAR (ch);      \
 814         CHAR_STRING_ADVANCE (ch, dst);  \
 815       }                                 \
 816     else                                \
 817       *dst++ = (c);                     \
 818   } while (0)
 819
 820
 821 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 822
 823 #define EMIT_TWO_BYTES(c1, c2)          \
 824   do {                                  \
 825     produced_chars += 2;                \
 826     if (multibytep)                     \
 827       {                                 \
 828         int ch;                         \
 829                                         \
 830         ch = (c1);                      \
 831         if (ch >= 0x80)                 \
 832           ch = BYTE8_TO_CHAR (ch);      \
 833         CHAR_STRING_ADVANCE (ch, dst);  \
 834         ch = (c2);                      \
 835         if (ch >= 0x80)                 \
 836           ch = BYTE8_TO_CHAR (ch);      \
 837         CHAR_STRING_ADVANCE (ch, dst);  \
 838       }                                 \
 839     else                                \
 840       {                                 \
 841         *dst++ = (c1);                  \
 842         *dst++ = (c2);                  \
 843       }                                 \
 844   } while (0)
 845
 846
 847 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 848   do {                                  \
 849     EMIT_ONE_BYTE (c1);                 \
 850     EMIT_TWO_BYTES (c2, c3);            \
 851   } while (0)
 852
 853
 854 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 855   do {                                          \
 856     EMIT_TWO_BYTES (c1, c2);                    \
 857     EMIT_TWO_BYTES (c3, c4);                    \
 858   } while (0)
 859
 860
 861 static void
 862 record_conversion_result (struct coding_system *coding,
 863                           enum coding_result_code result)
 864 {
 865   coding->result = result;
 866   switch (result)
 867     {
 868     case CODING_RESULT_INSUFFICIENT_SRC:
 869       Vlast_code_conversion_error = Qinsufficient_source;
 870       break;
 871     case CODING_RESULT_INCONSISTENT_EOL:
 872       Vlast_code_conversion_error = Qinconsistent_eol;
 873       break;
 874     case CODING_RESULT_INVALID_SRC:
 875       Vlast_code_conversion_error = Qinvalid_source;
 876       break;
 877     case CODING_RESULT_INTERRUPT:
 878       Vlast_code_conversion_error = Qinterrupted;
 879       break;
 880     case CODING_RESULT_INSUFFICIENT_MEM:
 881       Vlast_code_conversion_error = Qinsufficient_memory;
 882       break;
 883     }
 884 }
 885
 886 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 887   do {                                                                       \
 888     charset_map_loaded = 0;                                                  \
 889     c = DECODE_CHAR (charset, code);                                         \
 890     if (charset_map_loaded)                                                  \
 891       {                                                                      \
 892         const unsigned char *orig = coding->source;                          \
 893         EMACS_INT offset;                                                    \
 894                                                                              \
 895         coding_set_source (coding);                                          \
 896         offset = coding->source - orig;                                      \
 897         src += offset;                                                       \
 898         src_base += offset;                                                  \
 899         src_end += offset;                                                   \
 900       }                                                                      \
 901   } while (0)
 902
 903
 904 #define ASSURE_DESTINATION(bytes)                               \
 905   do {                                                          \
 906     if (dst + (bytes) >= dst_end)                               \
 907       {                                                         \
 908         int more_bytes = charbuf_end - charbuf + (bytes);       \
 909                                                                 \
 910         dst = alloc_destination (coding, more_bytes, dst);      \
 911         dst_end = coding->destination + coding->dst_bytes;      \
 912       }                                                         \
 913   } while (0)
 914
 915
 916
 917 static void
 918 coding_set_source (coding)
 919      struct coding_system *coding;
 920 {
 921   if (BUFFERP (coding->src_object))
 922     {
 923       struct buffer *buf = XBUFFER (coding->src_object);
 924
 925       if (coding->src_pos < 0)
 926         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
 927       else
 928         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
 929     }
 930   else if (STRINGP (coding->src_object))
 931     {
 932       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
 933     }
 934   else
 935     /* Otherwise, the source is C string and is never relocated
 936        automatically.  Thus we don't have to update anything.  */
 937     ;
 938 }
 939
 940 static void
 941 coding_set_destination (coding)
 942      struct coding_system *coding;
 943 {
 944   if (BUFFERP (coding->dst_object))
 945     {
 946       if (coding->src_pos < 0)
 947         {
 948           coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
 949           coding->dst_bytes = (GAP_END_ADDR
 950                                - (coding->src_bytes - coding->consumed)
 951                                - coding->destination);
 952         }
 953       else
 954         {
 955           /* We are sure that coding->dst_pos_byte is before the gap
 956              of the buffer. */
 957           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
 958                                  + coding->dst_pos_byte - 1);
 959           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
 960                                - coding->destination);
 961         }
 962     }
 963   else
 964     /* Otherwise, the destination is C string and is never relocated
 965        automatically.  Thus we don't have to update anything.  */
 966     ;
 967 }
 968
 969
 970 static void
 971 coding_alloc_by_realloc (coding, bytes)
 972      struct coding_system *coding;
 973      EMACS_INT bytes;
 974 {
 975   coding->destination = (unsigned char *) xrealloc (coding->destination,
 976                                                     coding->dst_bytes + bytes);
 977   coding->dst_bytes += bytes;
 978 }
 979
 980 static void
 981 coding_alloc_by_making_gap (coding, bytes)
 982      struct coding_system *coding;
 983      EMACS_INT bytes;
 984 {
 985   if (BUFFERP (coding->dst_object)
 986       && EQ (coding->src_object, coding->dst_object))
 987     {
 988       EMACS_INT add = coding->src_bytes - coding->consumed;
 989
 990       GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
 991       make_gap (bytes);
 992       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
 993     }
 994   else
 995     {
 996       Lisp_Object this_buffer;
 997
 998       this_buffer = Fcurrent_buffer ();
 999       set_buffer_internal (XBUFFER (coding->dst_object));
1000       make_gap (bytes);
1001       set_buffer_internal (XBUFFER (this_buffer));
1002     }
1003 }
1004
1005
1006 static unsigned char *
1007 alloc_destination (coding, nbytes, dst)
1008      struct coding_system *coding;
1009      EMACS_INT nbytes;
1010      unsigned char *dst;
1011 {
1012   EMACS_INT offset = dst - coding->destination;
1013
1014   if (BUFFERP (coding->dst_object))
1015     coding_alloc_by_making_gap (coding, nbytes);
1016   else
1017     coding_alloc_by_realloc (coding, nbytes);
1018   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1019   coding_set_destination (coding);
1020   dst = coding->destination + offset;
1021   return dst;
1022 }
1023
1024 /** Macros for annotations.  */
1025
1026 /* Maximum length of annotation data (sum of annotations for
1027    composition and charset).  */
1028 #define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
1029
1030 /* An annotation data is stored in the array coding->charbuf in this
1031    format:
1032      [ -LENGTH ANNOTATION_MASK FROM TO ... ]
1033    LENGTH is the number of elements in the annotation.
1034    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1035    FROM and TO specify the range of text annotated.  They are relative
1036    to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
1037
1038    The format of the following elements depend on ANNOTATION_MASK.
1039
1040    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1041    follows:
1042      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1043    METHOD is one of enum composition_method.
1044    Optionnal COMPOSITION-COMPONENTS are characters and composition
1045    rules.
1046
1047    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1048    follows.  */
1049
1050 #define ADD_ANNOTATION_DATA(buf, len, mask, from, to)   \
1051   do {                                                  \
1052     *(buf)++ = -(len);                                  \
1053     *(buf)++ = (mask);                                  \
1054     *(buf)++ = (from);                                  \
1055     *(buf)++ = (to);                                    \
1056     coding->annotated = 1;                              \
1057   } while (0);
1058
1059 #define ADD_COMPOSITION_DATA(buf, from, to, method)                           \
1060   do {                                                                        \
1061     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1062     *buf++ = method;                                                          \
1063   } while (0)
1064
1065
1066 #define ADD_CHARSET_DATA(buf, from, to, id)                               \
1067   do {                                                                    \
1068     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1069     *buf++ = id;                                                          \
1070   } while (0)
1071
1072 \f
1073 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1074
1075
1076
1077 \f
1078 /*** 3. UTF-8 ***/
1079
1080 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1081    Check if a text is encoded in UTF-8.  If it is, return 1, else
1082    return 0.  */
1083
1084 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1085 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1086 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1087 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1088 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1089 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1090
1091 static int
1092 detect_coding_utf_8 (coding, detect_info)
1093      struct coding_system *coding;
1094      struct coding_detection_info *detect_info;
1095 {
1096   const unsigned char *src = coding->source, *src_base;
1097   const unsigned char *src_end = coding->source + coding->src_bytes;
1098   int multibytep = coding->src_multibyte;
1099   int consumed_chars = 0;
1100   int found = 0;
1101
1102   detect_info->checked |= CATEGORY_MASK_UTF_8;
1103   /* A coding system of this category is always ASCII compatible.  */
1104   src += coding->head_ascii;
1105
1106   while (1)
1107     {
1108       int c, c1, c2, c3, c4;
1109
1110       src_base = src;
1111       ONE_MORE_BYTE (c);
1112       if (c < 0 || UTF_8_1_OCTET_P (c))
1113         continue;
1114       ONE_MORE_BYTE (c1);
1115       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1116         break;
1117       if (UTF_8_2_OCTET_LEADING_P (c))
1118         {
1119           found = CATEGORY_MASK_UTF_8;
1120           continue;
1121         }
1122       ONE_MORE_BYTE (c2);
1123       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1124         break;
1125       if (UTF_8_3_OCTET_LEADING_P (c))
1126         {
1127           found = CATEGORY_MASK_UTF_8;
1128           continue;
1129         }
1130       ONE_MORE_BYTE (c3);
1131       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1132         break;
1133       if (UTF_8_4_OCTET_LEADING_P (c))
1134         {
1135           found = CATEGORY_MASK_UTF_8;
1136           continue;
1137         }
1138       ONE_MORE_BYTE (c4);
1139       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1140         break;
1141       if (UTF_8_5_OCTET_LEADING_P (c))
1142         {
1143           found = CATEGORY_MASK_UTF_8;
1144           continue;
1145         }
1146       break;
1147     }
1148   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1149   return 0;
1150
1151  no_more_source:
1152   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1153     {
1154       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1155       return 0;
1156     }
1157   detect_info->found |= found;
1158   return 1;
1159 }
1160
1161
1162 static void
1163 decode_coding_utf_8 (coding)
1164      struct coding_system *coding;
1165 {
1166   const unsigned char *src = coding->source + coding->consumed;
1167   const unsigned char *src_end = coding->source + coding->src_bytes;
1168   const unsigned char *src_base;
1169   int *charbuf = coding->charbuf;
1170   int *charbuf_end = charbuf + coding->charbuf_size;
1171   int consumed_chars = 0, consumed_chars_base;
1172   int multibytep = coding->src_multibyte;
1173   Lisp_Object attr, charset_list;
1174
1175   CODING_GET_INFO (coding, attr, charset_list);
1176
1177   while (1)
1178     {
1179       int c, c1, c2, c3, c4, c5;
1180
1181       src_base = src;
1182       consumed_chars_base = consumed_chars;
1183
1184       if (charbuf >= charbuf_end)
1185         break;
1186
1187       ONE_MORE_BYTE (c1);
1188       if (c1 < 0)
1189         {
1190           c = - c1;
1191         }
1192       else if (UTF_8_1_OCTET_P(c1))
1193         {
1194           c = c1;
1195         }
1196       else
1197         {
1198           ONE_MORE_BYTE (c2);
1199           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1200             goto invalid_code;
1201           if (UTF_8_2_OCTET_LEADING_P (c1))
1202             {
1203               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1204               /* Reject overlong sequences here and below.  Encoders
1205                  producing them are incorrect, they can be misleading,
1206                  and they mess up read/write invariance.  */
1207               if (c < 128)
1208                 goto invalid_code;
1209             }
1210           else
1211             {
1212               ONE_MORE_BYTE (c3);
1213               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1214                 goto invalid_code;
1215               if (UTF_8_3_OCTET_LEADING_P (c1))
1216                 {
1217                   c = (((c1 & 0xF) << 12)
1218                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1219                   if (c < 0x800
1220                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1221                     goto invalid_code;
1222                 }
1223               else
1224                 {
1225                   ONE_MORE_BYTE (c4);
1226                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1227                     goto invalid_code;
1228                   if (UTF_8_4_OCTET_LEADING_P (c1))
1229                     {
1230                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1231                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1232                     if (c < 0x10000)
1233                       goto invalid_code;
1234                     }
1235                   else
1236                     {
1237                       ONE_MORE_BYTE (c5);
1238                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1239                         goto invalid_code;
1240                       if (UTF_8_5_OCTET_LEADING_P (c1))
1241                         {
1242                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1243                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1244                                | (c5 & 0x3F));
1245                           if ((c > MAX_CHAR) || (c < 0x200000))
1246                             goto invalid_code;
1247                         }
1248                       else
1249                         goto invalid_code;
1250                     }
1251                 }
1252             }
1253         }
1254
1255       *charbuf++ = c;
1256       continue;
1257
1258     invalid_code:
1259       src = src_base;
1260       consumed_chars = consumed_chars_base;
1261       ONE_MORE_BYTE (c);
1262       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1263       coding->errors++;
1264     }
1265
1266  no_more_source:
1267   coding->consumed_char += consumed_chars_base;
1268   coding->consumed = src_base - coding->source;
1269   coding->charbuf_used = charbuf - coding->charbuf;
1270 }
1271
1272
1273 static int
1274 encode_coding_utf_8 (coding)
1275      struct coding_system *coding;
1276 {
1277   int multibytep = coding->dst_multibyte;
1278   int *charbuf = coding->charbuf;
1279   int *charbuf_end = charbuf + coding->charbuf_used;
1280   unsigned char *dst = coding->destination + coding->produced;
1281   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1282   int produced_chars = 0;
1283   int c;
1284
1285   if (multibytep)
1286     {
1287       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1288
1289       while (charbuf < charbuf_end)
1290         {
1291           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1292
1293           ASSURE_DESTINATION (safe_room);
1294           c = *charbuf++;
1295           if (CHAR_BYTE8_P (c))
1296             {
1297               c = CHAR_TO_BYTE8 (c);
1298               EMIT_ONE_BYTE (c);
1299             }
1300           else
1301             {
1302               CHAR_STRING_ADVANCE (c, pend);
1303               for (p = str; p < pend; p++)
1304                 EMIT_ONE_BYTE (*p);
1305             }
1306         }
1307     }
1308   else
1309     {
1310       int safe_room = MAX_MULTIBYTE_LENGTH;
1311
1312       while (charbuf < charbuf_end)
1313         {
1314           ASSURE_DESTINATION (safe_room);
1315           c = *charbuf++;
1316           dst += CHAR_STRING (c, dst);
1317           produced_chars++;
1318         }
1319     }
1320   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1321   coding->produced_char += produced_chars;
1322   coding->produced = dst - coding->destination;
1323   return 0;
1324 }
1325
1326
1327 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1328    Check if a text is encoded in one of UTF-16 based coding systems.
1329    If it is, return 1, else return 0.  */
1330
1331 #define UTF_16_HIGH_SURROGATE_P(val) \
1332   (((val) & 0xFC00) == 0xD800)
1333
1334 #define UTF_16_LOW_SURROGATE_P(val) \
1335   (((val) & 0xFC00) == 0xDC00)
1336
1337 #define UTF_16_INVALID_P(val)   \
1338   (((val) == 0xFFFE)            \
1339    || ((val) == 0xFFFF)         \
1340    || UTF_16_LOW_SURROGATE_P (val))
1341
1342
1343 static int
1344 detect_coding_utf_16 (coding, detect_info)
1345      struct coding_system *coding;
1346      struct coding_detection_info *detect_info;
1347 {
1348   const unsigned char *src = coding->source, *src_base = src;
1349   const unsigned char *src_end = coding->source + coding->src_bytes;
1350   int multibytep = coding->src_multibyte;
1351   int consumed_chars = 0;
1352   int c1, c2;
1353
1354   detect_info->checked |= CATEGORY_MASK_UTF_16;
1355   if (coding->mode & CODING_MODE_LAST_BLOCK
1356       && (coding->src_chars & 1))
1357     {
1358       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1359       return 0;
1360     }
1361
1362   ONE_MORE_BYTE (c1);
1363   ONE_MORE_BYTE (c2);
1364   if ((c1 == 0xFF) && (c2 == 0xFE))
1365     {
1366       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1367                              | CATEGORY_MASK_UTF_16_AUTO);
1368       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1369                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1370                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1371     }
1372   else if ((c1 == 0xFE) && (c2 == 0xFF))
1373     {
1374       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1375                              | CATEGORY_MASK_UTF_16_AUTO);
1376       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1377                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1378                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1379     }
1380   else if (c1 >= 0 && c2 >= 0)
1381     {
1382       unsigned char b1[256], b2[256];
1383       int b1_variants = 1, b2_variants = 1;
1384       int n;
1385
1386       bzero (b1, 256), bzero (b2, 256);
1387       b1[c1]++, b2[c2]++;
1388       for (n = 0; n < 256 && src < src_end; n++)
1389         {
1390           src_base = src;
1391           ONE_MORE_BYTE (c1);
1392           ONE_MORE_BYTE (c2);
1393           if (c1 < 0 || c2 < 0)
1394             break;
1395           if (! b1[c1++]) b1_variants++;
1396           if (! b2[c2++]) b2_variants++;
1397         }
1398       if (b1_variants < b2_variants)
1399         detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1400       else
1401         detect_info->found |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1402       detect_info->rejected
1403         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1404     }
1405  no_more_source:
1406   return 1;
1407 }
1408
1409 static void
1410 decode_coding_utf_16 (coding)
1411      struct coding_system *coding;
1412 {
1413   const unsigned char *src = coding->source + coding->consumed;
1414   const unsigned char *src_end = coding->source + coding->src_bytes;
1415   const unsigned char *src_base;
1416   int *charbuf = coding->charbuf;
1417   int *charbuf_end = charbuf + coding->charbuf_size;
1418   int consumed_chars = 0, consumed_chars_base;
1419   int multibytep = coding->src_multibyte;
1420   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1421   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1422   int surrogate = CODING_UTF_16_SURROGATE (coding);
1423   Lisp_Object attr, charset_list;
1424
1425   CODING_GET_INFO (coding, attr, charset_list);
1426
1427   if (bom == utf_16_with_bom)
1428     {
1429       int c, c1, c2;
1430
1431       src_base = src;
1432       ONE_MORE_BYTE (c1);
1433       ONE_MORE_BYTE (c2);
1434       c = (c1 << 8) | c2;
1435
1436       if (endian == utf_16_big_endian
1437           ? c != 0xFEFF : c != 0xFFFE)
1438         {
1439           /* The first two bytes are not BOM.  Treat them as bytes
1440              for a normal character.  */
1441           src = src_base;
1442           coding->errors++;
1443         }
1444       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1445     }
1446   else if (bom == utf_16_detect_bom)
1447     {
1448       /* We have already tried to detect BOM and failed in
1449          detect_coding.  */
1450       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1451     }
1452
1453   while (1)
1454     {
1455       int c, c1, c2;
1456
1457       src_base = src;
1458       consumed_chars_base = consumed_chars;
1459
1460       if (charbuf + 2 >= charbuf_end)
1461         break;
1462
1463       ONE_MORE_BYTE (c1);
1464       if (c1 < 0)
1465         {
1466           *charbuf++ = -c1;
1467           continue;
1468         }
1469       ONE_MORE_BYTE (c2);
1470       if (c2 < 0)
1471         {
1472           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1473           *charbuf++ = -c2;
1474           continue;
1475         }
1476       c = (endian == utf_16_big_endian
1477            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1478       if (surrogate)
1479         {
1480           if (! UTF_16_LOW_SURROGATE_P (c))
1481             {
1482               if (endian == utf_16_big_endian)
1483                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1484               else
1485                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1486               *charbuf++ = c1;
1487               *charbuf++ = c2;
1488               coding->errors++;
1489               if (UTF_16_HIGH_SURROGATE_P (c))
1490                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1491               else
1492                 *charbuf++ = c;
1493             }
1494           else
1495             {
1496               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1497               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1498               *charbuf++ = c;
1499             }
1500         }
1501       else
1502         {
1503           if (UTF_16_HIGH_SURROGATE_P (c))
1504             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1505           else
1506             *charbuf++ = c;
1507         }
1508     }
1509
1510  no_more_source:
1511   coding->consumed_char += consumed_chars_base;
1512   coding->consumed = src_base - coding->source;
1513   coding->charbuf_used = charbuf - coding->charbuf;
1514 }
1515
1516 static int
1517 encode_coding_utf_16 (coding)
1518      struct coding_system *coding;
1519 {
1520   int multibytep = coding->dst_multibyte;
1521   int *charbuf = coding->charbuf;
1522   int *charbuf_end = charbuf + coding->charbuf_used;
1523   unsigned char *dst = coding->destination + coding->produced;
1524   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1525   int safe_room = 8;
1526   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1527   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1528   int produced_chars = 0;
1529   Lisp_Object attrs, charset_list;
1530   int c;
1531
1532   CODING_GET_INFO (coding, attrs, charset_list);
1533
1534   if (bom != utf_16_without_bom)
1535     {
1536       ASSURE_DESTINATION (safe_room);
1537       if (big_endian)
1538         EMIT_TWO_BYTES (0xFE, 0xFF);
1539       else
1540         EMIT_TWO_BYTES (0xFF, 0xFE);
1541       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1542     }
1543
1544   while (charbuf < charbuf_end)
1545     {
1546       ASSURE_DESTINATION (safe_room);
1547       c = *charbuf++;
1548       if (c >= MAX_UNICODE_CHAR)
1549         c = coding->default_char;
1550
1551       if (c < 0x10000)
1552         {
1553           if (big_endian)
1554             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1555           else
1556             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1557         }
1558       else
1559         {
1560           int c1, c2;
1561
1562           c -= 0x10000;
1563           c1 = (c >> 10) + 0xD800;
1564           c2 = (c & 0x3FF) + 0xDC00;
1565           if (big_endian)
1566             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1567           else
1568             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1569         }
1570     }
1571   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1572   coding->produced = dst - coding->destination;
1573   coding->produced_char += produced_chars;
1574   return 0;
1575 }
1576
1577 \f
1578 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1579
1580 /* Emacs' internal format for representation of multiple character
1581    sets is a kind of multi-byte encoding, i.e. characters are
1582    represented by variable-length sequences of one-byte codes.
1583
1584    ASCII characters and control characters (e.g. `tab', `newline') are
1585    represented by one-byte sequences which are their ASCII codes, in
1586    the range 0x00 through 0x7F.
1587
1588    8-bit characters of the range 0x80..0x9F are represented by
1589    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1590    code + 0x20).
1591
1592    8-bit characters of the range 0xA0..0xFF are represented by
1593    one-byte sequences which are their 8-bit code.
1594
1595    The other characters are represented by a sequence of `base
1596    leading-code', optional `extended leading-code', and one or two
1597    `position-code's.  The length of the sequence is determined by the
1598    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1599    whereas extended leading-code and position-code take the range 0xA0
1600    through 0xFF.  See `charset.h' for more details about leading-code
1601    and position-code.
1602
1603    --- CODE RANGE of Emacs' internal format ---
1604    character set        range
1605    -------------        -----
1606    ascii                0x00..0x7F
1607    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1608    eight-bit-graphic    0xA0..0xBF
1609    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1610    ---------------------------------------------
1611
1612    As this is the internal character representation, the format is
1613    usually not used externally (i.e. in a file or in a data sent to a
1614    process).  But, it is possible to have a text externally in this
1615    format (i.e. by encoding by the coding system `emacs-mule').
1616
1617    In that case, a sequence of one-byte codes has a slightly different
1618    form.
1619
1620    At first, all characters in eight-bit-control are represented by
1621    one-byte sequences which are their 8-bit code.
1622
1623    Next, character composition data are represented by the byte
1624    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1625    where,
1626         METHOD is 0xF0 plus one of composition method (enum
1627         composition_method),
1628
1629         BYTES is 0xA0 plus a byte length of this composition data,
1630
1631         CHARS is 0x20 plus a number of characters composed by this
1632         data,
1633
1634         COMPONENTs are characters of multibye form or composition
1635         rules encoded by two-byte of ASCII codes.
1636
1637    In addition, for backward compatibility, the following formats are
1638    also recognized as composition data on decoding.
1639
1640    0x80 MSEQ ...
1641    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1642
1643    Here,
1644         MSEQ is a multibyte form but in these special format:
1645           ASCII: 0xA0 ASCII_CODE+0x80,
1646           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1647         RULE is a one byte code of the range 0xA0..0xF0 that
1648         represents a composition rule.
1649   */
1650
1651 char emacs_mule_bytes[256];
1652
1653 int
1654 emacs_mule_char (coding, src, nbytes, nchars, id)
1655      struct coding_system *coding;
1656      const unsigned char *src;
1657      int *nbytes, *nchars, *id;
1658 {
1659   const unsigned char *src_end = coding->source + coding->src_bytes;
1660   const unsigned char *src_base = src;
1661   int multibytep = coding->src_multibyte;
1662   struct charset *charset;
1663   unsigned code;
1664   int c;
1665   int consumed_chars = 0;
1666
1667   ONE_MORE_BYTE (c);
1668   if (c < 0)
1669     {
1670       c = -c;
1671       charset = emacs_mule_charset[0];
1672     }
1673   else
1674     {
1675       switch (emacs_mule_bytes[c])
1676         {
1677         case 2:
1678           if (! (charset = emacs_mule_charset[c]))
1679             goto invalid_code;
1680           ONE_MORE_BYTE (c);
1681           if (c < 0)
1682             goto invalid_code;
1683           code = c & 0x7F;
1684           break;
1685
1686         case 3:
1687           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1688               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1689             {
1690               ONE_MORE_BYTE (c);
1691               if (c < 0 || ! (charset = emacs_mule_charset[c]))
1692                 goto invalid_code;
1693               ONE_MORE_BYTE (c);
1694               if (c < 0)
1695                 goto invalid_code;
1696               code = c & 0x7F;
1697             }
1698           else
1699             {
1700               if (! (charset = emacs_mule_charset[c]))
1701                 goto invalid_code;
1702               ONE_MORE_BYTE (c);
1703               if (c < 0)
1704                 goto invalid_code;
1705               code = (c & 0x7F) << 8;
1706               ONE_MORE_BYTE (c);
1707               if (c < 0)
1708                 goto invalid_code;
1709               code |= c & 0x7F;
1710             }
1711           break;
1712
1713         case 4:
1714           ONE_MORE_BYTE (c);
1715           if (c < 0 || ! (charset = emacs_mule_charset[c]))
1716             goto invalid_code;
1717           ONE_MORE_BYTE (c);
1718           if (c < 0)
1719             goto invalid_code;
1720           code = (c & 0x7F) << 8;
1721           ONE_MORE_BYTE (c);
1722           if (c < 0)
1723             goto invalid_code;
1724           code |= c & 0x7F;
1725           break;
1726
1727         case 1:
1728           code = c;
1729           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1730                                      ? charset_ascii : charset_eight_bit);
1731           break;
1732
1733         default:
1734           abort ();
1735         }
1736       c = DECODE_CHAR (charset, code);
1737       if (c < 0)
1738         goto invalid_code;
1739     }
1740   *nbytes = src - src_base;
1741   *nchars = consumed_chars;
1742   if (id)
1743     *id = charset->id;
1744   return c;
1745
1746  no_more_source:
1747   return -2;
1748
1749  invalid_code:
1750   return -1;
1751 }
1752
1753
1754 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1755    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1756    else return 0.  */
1757
1758 static int
1759 detect_coding_emacs_mule (coding, detect_info)
1760      struct coding_system *coding;
1761      struct coding_detection_info *detect_info;
1762 {
1763   const unsigned char *src = coding->source, *src_base;
1764   const unsigned char *src_end = coding->source + coding->src_bytes;
1765   int multibytep = coding->src_multibyte;
1766   int consumed_chars = 0;
1767   int c;
1768   int found = 0;
1769
1770   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1771   /* A coding system of this category is always ASCII compatible.  */
1772   src += coding->head_ascii;
1773
1774   while (1)
1775     {
1776       src_base = src;
1777       ONE_MORE_BYTE (c);
1778       if (c < 0)
1779         continue;
1780       if (c == 0x80)
1781         {
1782           /* Perhaps the start of composite character.  We simple skip
1783              it because analyzing it is too heavy for detecting.  But,
1784              at least, we check that the composite character
1785              constitues of more than 4 bytes.  */
1786           const unsigned char *src_base;
1787
1788         repeat:
1789           src_base = src;
1790           do
1791             {
1792               ONE_MORE_BYTE (c);
1793             }
1794           while (c >= 0xA0);
1795
1796           if (src - src_base <= 4)
1797             break;
1798           found = CATEGORY_MASK_EMACS_MULE;
1799           if (c == 0x80)
1800             goto repeat;
1801         }
1802
1803       if (c < 0x80)
1804         {
1805           if (c < 0x20
1806               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1807             break;
1808         }
1809       else
1810         {
1811           const unsigned char *src_base = src - 1;
1812
1813           do
1814             {
1815               ONE_MORE_BYTE (c);
1816             }
1817           while (c >= 0xA0);
1818           if (src - src_base != emacs_mule_bytes[*src_base])
1819             break;
1820           found = CATEGORY_MASK_EMACS_MULE;
1821         }
1822     }
1823   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1824   return 0;
1825
1826  no_more_source:
1827   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1828     {
1829       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1830       return 0;
1831     }
1832   detect_info->found |= found;
1833   return 1;
1834 }
1835
1836
1837 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1838
1839 /* Decode a character represented as a component of composition
1840    sequence of Emacs 20/21 style at SRC.  Set C to that character and
1841    update SRC to the head of next character (or an encoded composition
1842    rule).  If SRC doesn't points a composition component, set C to -1.
1843    If SRC points an invalid byte sequence, global exit by a return
1844    value 0.  */
1845
1846 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
1847   if (1)                                                        \
1848     {                                                           \
1849       int c;                                                    \
1850       int nbytes, nchars;                                       \
1851                                                                 \
1852       if (src == src_end)                                       \
1853         break;                                                  \
1854       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1855       if (c < 0)                                                \
1856         {                                                       \
1857           if (c == -2)                                          \
1858             break;                                              \
1859           goto invalid_code;                                    \
1860         }                                                       \
1861       *buf++ = c;                                               \
1862       src += nbytes;                                            \
1863       consumed_chars += nchars;                                 \
1864     }                                                           \
1865   else
1866
1867
1868 /* Decode a composition rule represented as a component of composition
1869    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
1870    and increment BUF.  If SRC points an invalid byte sequence, set C
1871    to -1.  */
1872
1873 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
1874   do {                                                  \
1875     int c, gref, nref;                                  \
1876                                                         \
1877     if (src >= src_end)                                 \
1878       goto invalid_code;                                \
1879     ONE_MORE_BYTE_NO_CHECK (c);                         \
1880     c -= 0x20;                                          \
1881     if (c < 0 || c >= 81)                               \
1882       goto invalid_code;                                \
1883                                                         \
1884     gref = c / 9, nref = c % 9;                         \
1885     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1886   } while (0)
1887
1888
1889 /* Decode a composition rule represented as a component of composition
1890    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
1891    and increment BUF.  If SRC points an invalid byte sequence, set C
1892    to -1.  */
1893
1894 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
1895   do {                                                  \
1896     int gref, nref;                                     \
1897                                                         \
1898     if (src + 1>= src_end)                              \
1899       goto invalid_code;                                \
1900     ONE_MORE_BYTE_NO_CHECK (gref);                      \
1901     gref -= 0x20;                                       \
1902     ONE_MORE_BYTE_NO_CHECK (nref);                      \
1903     nref -= 0x20;                                       \
1904     if (gref < 0 || gref >= 81                          \
1905         || nref < 0 || nref >= 81)                      \
1906       goto invalid_code;                                \
1907     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1908   } while (0)
1909
1910
1911 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
1912   do {                                                                  \
1913     /* Emacs 21 style format.  The first three bytes at SRC are         \
1914        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
1915        the byte length of this composition information, CHARS is the    \
1916        number of characters composed by this composition.  */           \
1917     enum composition_method method = c - 0xF2;                          \
1918     int *charbuf_base = charbuf;                                        \
1919     int from, to;                                                       \
1920     int consumed_chars_limit;                                           \
1921     int nbytes, nchars;                                                 \
1922                                                                         \
1923     ONE_MORE_BYTE (c);                                                  \
1924     if (c < 0)                                                          \
1925       goto invalid_code;                                                \
1926     nbytes = c - 0xA0;                                                  \
1927     if (nbytes < 3)                                                     \
1928       goto invalid_code;                                                \
1929     ONE_MORE_BYTE (c);                                                  \
1930     if (c < 0)                                                          \
1931       goto invalid_code;                                                \
1932     nchars = c - 0xA0;                                                  \
1933     from = coding->produced + char_offset;                              \
1934     to = from + nchars;                                                 \
1935     ADD_COMPOSITION_DATA (charbuf, from, to, method);                   \
1936     consumed_chars_limit = consumed_chars_base + nbytes;                \
1937     if (method != COMPOSITION_RELATIVE)                                 \
1938       {                                                                 \
1939         int i = 0;                                                      \
1940         while (consumed_chars < consumed_chars_limit)                   \
1941           {                                                             \
1942             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
1943               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
1944             else                                                        \
1945               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
1946             i++;                                                        \
1947           }                                                             \
1948         if (consumed_chars < consumed_chars_limit)                      \
1949           goto invalid_code;                                            \
1950         charbuf_base[0] -= i;                                           \
1951       }                                                                 \
1952   } while (0)
1953
1954
1955 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)            \
1956   do {                                                          \
1957     /* Emacs 20 style format for relative composition.  */      \
1958     /* Store multibyte form of characters to be composed.  */   \
1959     enum composition_method method = COMPOSITION_RELATIVE;      \
1960     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1961     int *buf = components;                                      \
1962     int i, j;                                                   \
1963     int from, to;                                               \
1964                                                                 \
1965     src = src_base;                                             \
1966     ONE_MORE_BYTE (c);          /* skip 0x80 */                 \
1967     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1968       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                 \
1969     if (i < 2)                                                  \
1970       goto invalid_code;                                        \
1971     from = coding->produced_char + char_offset;                 \
1972     to = from + i;                                              \
1973     ADD_COMPOSITION_DATA (charbuf, from, to, method);           \
1974     for (j = 0; j < i; j++)                                     \
1975       *charbuf++ = components[j];                               \
1976   } while (0)
1977
1978
1979 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
1980   do {                                                          \
1981     /* Emacs 20 style format for rule-base composition.  */     \
1982     /* Store multibyte form of characters to be composed.  */   \
1983     enum composition_method method = COMPOSITION_WITH_RULE;     \
1984     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1985     int *buf = components;                                      \
1986     int i, j;                                                   \
1987     int from, to;                                               \
1988                                                                 \
1989     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
1990     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1991       {                                                         \
1992         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
1993         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
1994       }                                                         \
1995     if (i < 1 || (buf - components) % 2 == 0)                   \
1996       goto invalid_code;                                        \
1997     if (charbuf + i + (i / 2) + 1 < charbuf_end)                \
1998       goto no_more_source;                                      \
1999     from = coding->produced_char + char_offset;                 \
2000     to = from + i;                                              \
2001     ADD_COMPOSITION_DATA (buf, from, to, method);               \
2002     for (j = 0; j < i; j++)                                     \
2003       *charbuf++ = components[j];                               \
2004     for (j = 0; j < i; j += 2)                                  \
2005       *charbuf++ = components[j];                               \
2006   } while (0)
2007
2008
2009 static void
2010 decode_coding_emacs_mule (coding)
2011      struct coding_system *coding;
2012 {
2013   const unsigned char *src = coding->source + coding->consumed;
2014   const unsigned char *src_end = coding->source + coding->src_bytes;
2015   const unsigned char *src_base;
2016   int *charbuf = coding->charbuf;
2017   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2018   int consumed_chars = 0, consumed_chars_base;
2019   int multibytep = coding->src_multibyte;
2020   Lisp_Object attrs, charset_list;
2021   int char_offset = coding->produced_char;
2022   int last_offset = char_offset;
2023   int last_id = charset_ascii;
2024
2025   CODING_GET_INFO (coding, attrs, charset_list);
2026
2027   while (1)
2028     {
2029       int c;
2030
2031       src_base = src;
2032       consumed_chars_base = consumed_chars;
2033
2034       if (charbuf >= charbuf_end)
2035         break;
2036
2037       ONE_MORE_BYTE (c);
2038       if (c < 0)
2039         {
2040           *charbuf++ = -c;
2041           char_offset++;
2042         }
2043       else if (c < 0x80)
2044         {
2045           *charbuf++ = c;
2046           char_offset++;
2047         }
2048       else if (c == 0x80)
2049         {
2050           ONE_MORE_BYTE (c);
2051           if (c < 0)
2052             goto invalid_code;
2053           if (c - 0xF2 >= COMPOSITION_RELATIVE
2054               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2055             DECODE_EMACS_MULE_21_COMPOSITION (c);
2056           else if (c < 0xC0)
2057             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2058           else if (c == 0xFF)
2059             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2060           else
2061             goto invalid_code;
2062         }
2063       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2064         {
2065           int nbytes, nchars;
2066           int id;
2067
2068           src = src_base;
2069           consumed_chars = consumed_chars_base;
2070           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2071           if (c < 0)
2072             {
2073               if (c == -2)
2074                 break;
2075               goto invalid_code;
2076             }
2077           if (last_id != id)
2078             {
2079               if (last_id != charset_ascii)
2080                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2081               last_id = id;
2082               last_offset = char_offset;
2083             }
2084           *charbuf++ = c;
2085           src += nbytes;
2086           consumed_chars += nchars;
2087           char_offset++;
2088         }
2089       continue;
2090
2091     invalid_code:
2092       src = src_base;
2093       consumed_chars = consumed_chars_base;
2094       ONE_MORE_BYTE (c);
2095       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2096       char_offset++;
2097       coding->errors++;
2098     }
2099
2100  no_more_source:
2101   if (last_id != charset_ascii)
2102     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2103   coding->consumed_char += consumed_chars_base;
2104   coding->consumed = src_base - coding->source;
2105   coding->charbuf_used = charbuf - coding->charbuf;
2106 }
2107
2108
2109 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2110   do {                                          \
2111     if (id < 0xA0)                              \
2112       codes[0] = id, codes[1] = 0;              \
2113     else if (id < 0xE0)                         \
2114       codes[0] = 0x9A, codes[1] = id;           \
2115     else if (id < 0xF0)                         \
2116       codes[0] = 0x9B, codes[1] = id;           \
2117     else if (id < 0xF5)                         \
2118       codes[0] = 0x9C, codes[1] = id;           \
2119     else                                        \
2120       codes[0] = 0x9D, codes[1] = id;           \
2121   } while (0);
2122
2123
2124 static int
2125 encode_coding_emacs_mule (coding)
2126      struct coding_system *coding;
2127 {
2128   int multibytep = coding->dst_multibyte;
2129   int *charbuf = coding->charbuf;
2130   int *charbuf_end = charbuf + coding->charbuf_used;
2131   unsigned char *dst = coding->destination + coding->produced;
2132   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2133   int safe_room = 8;
2134   int produced_chars = 0;
2135   Lisp_Object attrs, charset_list;
2136   int c;
2137   int preferred_charset_id = -1;
2138
2139   CODING_GET_INFO (coding, attrs, charset_list);
2140   if (! EQ (charset_list, Vemacs_mule_charset_list))
2141     {
2142       CODING_ATTR_CHARSET_LIST (attrs)
2143         = charset_list = Vemacs_mule_charset_list;
2144     }
2145
2146   while (charbuf < charbuf_end)
2147     {
2148       ASSURE_DESTINATION (safe_room);
2149       c = *charbuf++;
2150
2151       if (c < 0)
2152         {
2153           /* Handle an annotation.  */
2154           switch (*charbuf)
2155             {
2156             case CODING_ANNOTATE_COMPOSITION_MASK:
2157               /* Not yet implemented.  */
2158               break;
2159             case CODING_ANNOTATE_CHARSET_MASK:
2160               preferred_charset_id = charbuf[3];
2161               if (preferred_charset_id >= 0
2162                   && NILP (Fmemq (make_number (preferred_charset_id),
2163                                   charset_list)))
2164                 preferred_charset_id = -1;
2165               break;
2166             default:
2167               abort ();
2168             }
2169           charbuf += -c - 1;
2170           continue;
2171         }
2172
2173       if (ASCII_CHAR_P (c))
2174         EMIT_ONE_ASCII_BYTE (c);
2175       else if (CHAR_BYTE8_P (c))
2176         {
2177           c = CHAR_TO_BYTE8 (c);
2178           EMIT_ONE_BYTE (c);
2179         }
2180       else
2181         {
2182           struct charset *charset;
2183           unsigned code;
2184           int dimension;
2185           int emacs_mule_id;
2186           unsigned char leading_codes[2];
2187
2188           if (preferred_charset_id >= 0)
2189             {
2190               charset = CHARSET_FROM_ID (preferred_charset_id);
2191               if (! CHAR_CHARSET_P (c, charset))
2192                 charset = char_charset (c, charset_list, NULL);
2193             }
2194           else
2195             charset = char_charset (c, charset_list, &code);
2196           if (! charset)
2197             {
2198               c = coding->default_char;
2199               if (ASCII_CHAR_P (c))
2200                 {
2201                   EMIT_ONE_ASCII_BYTE (c);
2202                   continue;
2203                 }
2204               charset = char_charset (c, charset_list, &code);
2205             }
2206           dimension = CHARSET_DIMENSION (charset);
2207           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2208           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2209           EMIT_ONE_BYTE (leading_codes[0]);
2210           if (leading_codes[1])
2211             EMIT_ONE_BYTE (leading_codes[1]);
2212           if (dimension == 1)
2213             EMIT_ONE_BYTE (code | 0x80);
2214           else
2215             {
2216               code |= 0x8080;
2217               EMIT_ONE_BYTE (code >> 8);
2218               EMIT_ONE_BYTE (code & 0xFF);
2219             }
2220         }
2221     }
2222   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2223   coding->produced_char += produced_chars;
2224   coding->produced = dst - coding->destination;
2225   return 0;
2226 }
2227
2228 \f
2229 /*** 7. ISO2022 handlers ***/
2230
2231 /* The following note describes the coding system ISO2022 briefly.
2232    Since the intention of this note is to help understand the
2233    functions in this file, some parts are NOT ACCURATE or are OVERLY
2234    SIMPLIFIED.  For thorough understanding, please refer to the
2235    original document of ISO2022.  This is equivalent to the standard
2236    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2237
2238    ISO2022 provides many mechanisms to encode several character sets
2239    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2240    is encoded using bytes less than 128.  This may make the encoded
2241    text a little bit longer, but the text passes more easily through
2242    several types of gateway, some of which strip off the MSB (Most
2243    Significant Bit).
2244
2245    There are two kinds of character sets: control character sets and
2246    graphic character sets.  The former contain control characters such
2247    as `newline' and `escape' to provide control functions (control
2248    functions are also provided by escape sequences).  The latter
2249    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2250    two control character sets and many graphic character sets.
2251
2252    Graphic character sets are classified into one of the following
2253    four classes, according to the number of bytes (DIMENSION) and
2254    number of characters in one dimension (CHARS) of the set:
2255    - DIMENSION1_CHARS94
2256    - DIMENSION1_CHARS96
2257    - DIMENSION2_CHARS94
2258    - DIMENSION2_CHARS96
2259
2260    In addition, each character set is assigned an identification tag,
2261    unique for each set, called the "final character" (denoted as <F>
2262    hereafter).  The <F> of each character set is decided by ECMA(*)
2263    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2264    (0x30..0x3F are for private use only).
2265
2266    Note (*): ECMA = European Computer Manufacturers Association
2267
2268    Here are examples of graphic character sets [NAME(<F>)]:
2269         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2270         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2271         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2272         o DIMENSION2_CHARS96 -- none for the moment
2273
2274    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2275         C0 [0x00..0x1F] -- control character plane 0
2276         GL [0x20..0x7F] -- graphic character plane 0
2277         C1 [0x80..0x9F] -- control character plane 1
2278         GR [0xA0..0xFF] -- graphic character plane 1
2279
2280    A control character set is directly designated and invoked to C0 or
2281    C1 by an escape sequence.  The most common case is that:
2282    - ISO646's  control character set is designated/invoked to C0, and
2283    - ISO6429's control character set is designated/invoked to C1,
2284    and usually these designations/invocations are omitted in encoded
2285    text.  In a 7-bit environment, only C0 can be used, and a control
2286    character for C1 is encoded by an appropriate escape sequence to
2287    fit into the environment.  All control characters for C1 are
2288    defined to have corresponding escape sequences.
2289
2290    A graphic character set is at first designated to one of four
2291    graphic registers (G0 through G3), then these graphic registers are
2292    invoked to GL or GR.  These designations and invocations can be
2293    done independently.  The most common case is that G0 is invoked to
2294    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2295    these invocations and designations are omitted in encoded text.
2296    In a 7-bit environment, only GL can be used.
2297
2298    When a graphic character set of CHARS94 is invoked to GL, codes
2299    0x20 and 0x7F of the GL area work as control characters SPACE and
2300    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2301    be used.
2302
2303    There are two ways of invocation: locking-shift and single-shift.
2304    With locking-shift, the invocation lasts until the next different
2305    invocation, whereas with single-shift, the invocation affects the
2306    following character only and doesn't affect the locking-shift
2307    state.  Invocations are done by the following control characters or
2308    escape sequences:
2309
2310    ----------------------------------------------------------------------
2311    abbrev  function                  cntrl escape seq   description
2312    ----------------------------------------------------------------------
2313    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2314    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2315    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2316    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2317    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2318    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2319    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2320    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2321    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2322    ----------------------------------------------------------------------
2323    (*) These are not used by any known coding system.
2324
2325    Control characters for these functions are defined by macros
2326    ISO_CODE_XXX in `coding.h'.
2327
2328    Designations are done by the following escape sequences:
2329    ----------------------------------------------------------------------
2330    escape sequence      description
2331    ----------------------------------------------------------------------
2332    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2333    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2334    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2335    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2336    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2337    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2338    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2339    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2340    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2341    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2342    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2343    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2344    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2345    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2346    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2347    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2348    ----------------------------------------------------------------------
2349
2350    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2351    of dimension 1, chars 94, and final character <F>, etc...
2352
2353    Note (*): Although these designations are not allowed in ISO2022,
2354    Emacs accepts them on decoding, and produces them on encoding
2355    CHARS96 character sets in a coding system which is characterized as
2356    7-bit environment, non-locking-shift, and non-single-shift.
2357
2358    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2359    '(' must be omitted.  We refer to this as "short-form" hereafter.
2360
2361    Now you may notice that there are a lot of ways of encoding the
2362    same multilingual text in ISO2022.  Actually, there exist many
2363    coding systems such as Compound Text (used in X11's inter client
2364    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2365    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2366    localized platforms), and all of these are variants of ISO2022.
2367
2368    In addition to the above, Emacs handles two more kinds of escape
2369    sequences: ISO6429's direction specification and Emacs' private
2370    sequence for specifying character composition.
2371
2372    ISO6429's direction specification takes the following form:
2373         o CSI ']'      -- end of the current direction
2374         o CSI '0' ']'  -- end of the current direction
2375         o CSI '1' ']'  -- start of left-to-right text
2376         o CSI '2' ']'  -- start of right-to-left text
2377    The control character CSI (0x9B: control sequence introducer) is
2378    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2379
2380    Character composition specification takes the following form:
2381         o ESC '0' -- start relative composition
2382         o ESC '1' -- end composition
2383         o ESC '2' -- start rule-base composition (*)
2384         o ESC '3' -- start relative composition with alternate chars  (**)
2385         o ESC '4' -- start rule-base composition with alternate chars  (**)
2386   Since these are not standard escape sequences of any ISO standard,
2387   the use of them with these meanings is restricted to Emacs only.
2388
2389   (*) This form is used only in Emacs 20.7 and older versions,
2390   but newer versions can safely decode it.
2391   (**) This form is used only in Emacs 21.1 and newer versions,
2392   and older versions can't decode it.
2393
2394   Here's a list of example usages of these composition escape
2395   sequences (categorized by `enum composition_method').
2396
2397   COMPOSITION_RELATIVE:
2398         ESC 0 CHAR [ CHAR ] ESC 1
2399   COMPOSITION_WITH_RULE:
2400         ESC 2 CHAR [ RULE CHAR ] ESC 1
2401   COMPOSITION_WITH_ALTCHARS:
2402         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2403   COMPOSITION_WITH_RULE_ALTCHARS:
2404         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2405
2406 enum iso_code_class_type iso_code_class[256];
2407
2408 #define SAFE_CHARSET_P(coding, id)      \
2409   ((id) <= (coding)->max_charset_id     \
2410    && (coding)->safe_charsets[id] >= 0)
2411
2412
2413 #define SHIFT_OUT_OK(category)  \
2414   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2415
2416 static void
2417 setup_iso_safe_charsets (attrs)
2418      Lisp_Object attrs;
2419 {
2420   Lisp_Object charset_list, safe_charsets;
2421   Lisp_Object request;
2422   Lisp_Object reg_usage;
2423   Lisp_Object tail;
2424   int reg94, reg96;
2425   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2426   int max_charset_id;
2427
2428   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2429   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2430       && ! EQ (charset_list, Viso_2022_charset_list))
2431     {
2432       CODING_ATTR_CHARSET_LIST (attrs)
2433         = charset_list = Viso_2022_charset_list;
2434       ASET (attrs, coding_attr_safe_charsets, Qnil);
2435     }
2436
2437   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2438     return;
2439
2440   max_charset_id = 0;
2441   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2442     {
2443       int id = XINT (XCAR (tail));
2444       if (max_charset_id < id)
2445         max_charset_id = id;
2446     }
2447
2448   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2449                                 make_number (255));
2450   request = AREF (attrs, coding_attr_iso_request);
2451   reg_usage = AREF (attrs, coding_attr_iso_usage);
2452   reg94 = XINT (XCAR (reg_usage));
2453   reg96 = XINT (XCDR (reg_usage));
2454
2455   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2456     {
2457       Lisp_Object id;
2458       Lisp_Object reg;
2459       struct charset *charset;
2460
2461       id = XCAR (tail);
2462       charset = CHARSET_FROM_ID (XINT (id));
2463       reg = Fcdr (Fassq (id, request));
2464       if (! NILP (reg))
2465         SSET (safe_charsets, XINT (id), XINT (reg));
2466       else if (charset->iso_chars_96)
2467         {
2468           if (reg96 < 4)
2469             SSET (safe_charsets, XINT (id), reg96);
2470         }
2471       else
2472         {
2473           if (reg94 < 4)
2474             SSET (safe_charsets, XINT (id), reg94);
2475         }
2476     }
2477   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2478 }
2479
2480
2481 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2482    Check if a text is encoded in one of ISO-2022 based codig systems.
2483    If it is, return 1, else return 0.  */
2484
2485 static int
2486 detect_coding_iso_2022 (coding, detect_info)
2487      struct coding_system *coding;
2488      struct coding_detection_info *detect_info;
2489 {
2490   const unsigned char *src = coding->source, *src_base = src;
2491   const unsigned char *src_end = coding->source + coding->src_bytes;
2492   int multibytep = coding->src_multibyte;
2493   int single_shifting = 0;
2494   int id;
2495   int c, c1;
2496   int consumed_chars = 0;
2497   int i;
2498   int rejected = 0;
2499   int found = 0;
2500
2501   detect_info->checked |= CATEGORY_MASK_ISO;
2502
2503   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2504     {
2505       struct coding_system *this = &(coding_categories[i]);
2506       Lisp_Object attrs, val;
2507
2508       attrs = CODING_ID_ATTRS (this->id);
2509       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2510           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2511         setup_iso_safe_charsets (attrs);
2512       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2513       this->max_charset_id = SCHARS (val) - 1;
2514       this->safe_charsets = (char *) SDATA (val);
2515     }
2516
2517   /* A coding system of this category is always ASCII compatible.  */
2518   src += coding->head_ascii;
2519
2520   while (rejected != CATEGORY_MASK_ISO)
2521     {
2522       src_base = src;
2523       ONE_MORE_BYTE (c);
2524       switch (c)
2525         {
2526         case ISO_CODE_ESC:
2527           if (inhibit_iso_escape_detection)
2528             break;
2529           single_shifting = 0;
2530           ONE_MORE_BYTE (c);
2531           if (c >= '(' && c <= '/')
2532             {
2533               /* Designation sequence for a charset of dimension 1.  */
2534               ONE_MORE_BYTE (c1);
2535               if (c1 < ' ' || c1 >= 0x80
2536                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2537                 /* Invalid designation sequence.  Just ignore.  */
2538                 break;
2539             }
2540           else if (c == '$')
2541             {
2542               /* Designation sequence for a charset of dimension 2.  */
2543               ONE_MORE_BYTE (c);
2544               if (c >= '@' && c <= 'B')
2545                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2546                 id = iso_charset_table[1][0][c];
2547               else if (c >= '(' && c <= '/')
2548                 {
2549                   ONE_MORE_BYTE (c1);
2550                   if (c1 < ' ' || c1 >= 0x80
2551                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2552                     /* Invalid designation sequence.  Just ignore.  */
2553                     break;
2554                 }
2555               else
2556                 /* Invalid designation sequence.  Just ignore it.  */
2557                 break;
2558             }
2559           else if (c == 'N' || c == 'O')
2560             {
2561               /* ESC <Fe> for SS2 or SS3.  */
2562               single_shifting = 1;
2563               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2564               break;
2565             }
2566           else if (c >= '0' && c <= '4')
2567             {
2568               /* ESC <Fp> for start/end composition.  */
2569               found |= CATEGORY_MASK_ISO;
2570               break;
2571             }
2572           else
2573             {
2574               /* Invalid escape sequence.  Just ignore it.  */
2575               break;
2576             }
2577
2578           /* We found a valid designation sequence for CHARSET.  */
2579           rejected |= CATEGORY_MASK_ISO_8BIT;
2580           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2581                               id))
2582             found |= CATEGORY_MASK_ISO_7;
2583           else
2584             rejected |= CATEGORY_MASK_ISO_7;
2585           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2586                               id))
2587             found |= CATEGORY_MASK_ISO_7_TIGHT;
2588           else
2589             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2590           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2591                               id))
2592             found |= CATEGORY_MASK_ISO_7_ELSE;
2593           else
2594             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2595           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2596                               id))
2597             found |= CATEGORY_MASK_ISO_8_ELSE;
2598           else
2599             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2600           break;
2601
2602         case ISO_CODE_SO:
2603         case ISO_CODE_SI:
2604           /* Locking shift out/in.  */
2605           if (inhibit_iso_escape_detection)
2606             break;
2607           single_shifting = 0;
2608           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2609           found |= CATEGORY_MASK_ISO_ELSE;
2610           break;
2611
2612         case ISO_CODE_CSI:
2613           /* Control sequence introducer.  */
2614           single_shifting = 0;
2615           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2616           found |= CATEGORY_MASK_ISO_8_ELSE;
2617           goto check_extra_latin;
2618
2619
2620         case ISO_CODE_SS2:
2621         case ISO_CODE_SS3:
2622           /* Single shift.   */
2623           if (inhibit_iso_escape_detection)
2624             break;
2625           single_shifting = 1;
2626           rejected |= CATEGORY_MASK_ISO_7BIT;
2627           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2628               & CODING_ISO_FLAG_SINGLE_SHIFT)
2629             found |= CATEGORY_MASK_ISO_8_1;
2630           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2631               & CODING_ISO_FLAG_SINGLE_SHIFT)
2632             found |= CATEGORY_MASK_ISO_8_2;
2633           goto check_extra_latin;
2634
2635         default:
2636           if (c < 0)
2637             continue;
2638           if (c < 0x80)
2639             {
2640               single_shifting = 0;
2641               break;
2642             }
2643           if (c >= 0xA0)
2644             {
2645               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2646               found |= CATEGORY_MASK_ISO_8_1;
2647               /* Check the length of succeeding codes of the range
2648                  0xA0..0FF.  If the byte length is even, we include
2649                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2650                  only when we are not single shifting.  */
2651               if (! single_shifting
2652                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2653                 {
2654                   int i = 1;
2655                   while (src < src_end)
2656                     {
2657                       ONE_MORE_BYTE (c);
2658                       if (c < 0xA0)
2659                         break;
2660                       i++;
2661                     }
2662
2663                   if (i & 1 && src < src_end)
2664                     rejected |= CATEGORY_MASK_ISO_8_2;
2665                   else
2666                     found |= CATEGORY_MASK_ISO_8_2;
2667                 }
2668               break;
2669             }
2670         check_extra_latin:
2671           single_shifting = 0;
2672           if (! VECTORP (Vlatin_extra_code_table)
2673               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2674             {
2675               rejected = CATEGORY_MASK_ISO;
2676               break;
2677             }
2678           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2679               & CODING_ISO_FLAG_LATIN_EXTRA)
2680             found |= CATEGORY_MASK_ISO_8_1;
2681           else
2682             rejected |= CATEGORY_MASK_ISO_8_1;
2683           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2684               & CODING_ISO_FLAG_LATIN_EXTRA)
2685             found |= CATEGORY_MASK_ISO_8_2;
2686           else
2687             rejected |= CATEGORY_MASK_ISO_8_2;
2688         }
2689     }
2690   detect_info->rejected |= CATEGORY_MASK_ISO;
2691   return 0;
2692
2693  no_more_source:
2694   detect_info->rejected |= rejected;
2695   detect_info->found |= (found & ~rejected);
2696   return 1;
2697 }
2698
2699
2700 /* Set designation state into CODING.  */
2701 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2702   do {                                                                  \
2703     int id, prev;                                                       \
2704                                                                         \
2705     if (final < '0' || final >= 128                                     \
2706         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2707         || !SAFE_CHARSET_P (coding, id))                                \
2708       {                                                                 \
2709         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2710         goto invalid_code;                                              \
2711       }                                                                 \
2712     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2713     if (id == charset_jisx0201_roman)                                   \
2714       {                                                                 \
2715         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2716           id = charset_ascii;                                           \
2717       }                                                                 \
2718     else if (id == charset_jisx0208_1978)                               \
2719       {                                                                 \
2720         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2721           id = charset_jisx0208;                                        \
2722       }                                                                 \
2723     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2724     /* If there was an invalid designation to REG previously, and this  \
2725        designation is ASCII to REG, we should keep this designation     \
2726        sequence.  */                                                    \
2727     if (prev == -2 && id == charset_ascii)                              \
2728       goto invalid_code;                                                \
2729   } while (0)
2730
2731
2732 #define MAYBE_FINISH_COMPOSITION()                              \
2733   do {                                                          \
2734     int i;                                                      \
2735     if (composition_state == COMPOSING_NO)                      \
2736       break;                                                    \
2737     /* It is assured that we have enough room for producing     \
2738        characters stored in the table `components'.  */         \
2739     if (charbuf + component_idx > charbuf_end)                  \
2740       goto no_more_source;                                      \
2741     composition_state = COMPOSING_NO;                           \
2742     if (method == COMPOSITION_RELATIVE                          \
2743         || method == COMPOSITION_WITH_ALTCHARS)                 \
2744       {                                                         \
2745         for (i = 0; i < component_idx; i++)                     \
2746           *charbuf++ = components[i];                           \
2747         char_offset += component_idx;                           \
2748       }                                                         \
2749     else                                                        \
2750       {                                                         \
2751         for (i = 0; i < component_idx; i += 2)                  \
2752           *charbuf++ = components[i];                           \
2753         char_offset += (component_idx / 2) + 1;                 \
2754       }                                                         \
2755   } while (0)
2756
2757
2758 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2759    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2760    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2761    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2762    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2763   */
2764
2765 #define DECODE_COMPOSITION_START(c1)                                    \
2766   do {                                                                  \
2767     if (c1 == '0'                                                       \
2768         && composition_state == COMPOSING_COMPONENT_RULE)               \
2769       {                                                                 \
2770         component_len = component_idx;                                  \
2771         composition_state = COMPOSING_CHAR;                             \
2772       }                                                                 \
2773     else                                                                \
2774       {                                                                 \
2775         const unsigned char *p;                                         \
2776                                                                         \
2777         MAYBE_FINISH_COMPOSITION ();                                    \
2778         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
2779           goto no_more_source;                                          \
2780         for (p = src; p < src_end - 1; p++)                             \
2781           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
2782             break;                                                      \
2783         if (p == src_end - 1)                                           \
2784           {                                                             \
2785             if (coding->mode & CODING_MODE_LAST_BLOCK)                  \
2786               goto invalid_code;                                        \
2787             goto no_more_source;                                        \
2788           }                                                             \
2789                                                                         \
2790         /* This is surely the start of a composition.  */               \
2791         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
2792                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
2793                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
2794                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
2795         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
2796                              : COMPOSING_COMPONENT_CHAR);               \
2797         component_idx = component_len = 0;                              \
2798       }                                                                 \
2799   } while (0)
2800
2801
2802 /* Handle compositoin end sequence ESC 1.  */
2803
2804 #define DECODE_COMPOSITION_END()                                        \
2805   do {                                                                  \
2806     int nchars = (component_len > 0 ? component_idx - component_len     \
2807                   : method == COMPOSITION_RELATIVE ? component_idx      \
2808                   : (component_idx + 1) / 2);                           \
2809     int i;                                                              \
2810     int *saved_charbuf = charbuf;                                       \
2811     int from = char_offset;                                             \
2812     int to = from + nchars;                                             \
2813                                                                         \
2814     ADD_COMPOSITION_DATA (charbuf, from, to, method);                   \
2815     if (method != COMPOSITION_RELATIVE)                                 \
2816       {                                                                 \
2817         if (component_len == 0)                                         \
2818           for (i = 0; i < component_idx; i++)                           \
2819             *charbuf++ = components[i];                                 \
2820         else                                                            \
2821           for (i = 0; i < component_len; i++)                           \
2822             *charbuf++ = components[i];                                 \
2823         *saved_charbuf = saved_charbuf - charbuf;                       \
2824       }                                                                 \
2825     if (method == COMPOSITION_WITH_RULE)                                \
2826       for (i = 0; i < component_idx; i += 2, char_offset++)             \
2827         *charbuf++ = components[i];                                     \
2828     else                                                                \
2829       for (i = component_len; i < component_idx; i++, char_offset++)    \
2830         *charbuf++ = components[i];                                     \
2831     coding->annotated = 1;                                              \
2832     composition_state = COMPOSING_NO;                                   \
2833   } while (0)
2834
2835
2836 /* Decode a composition rule from the byte C1 (and maybe one more byte
2837    from SRC) and store one encoded composition rule in
2838    coding->cmp_data.  */
2839
2840 #define DECODE_COMPOSITION_RULE(c1)                                     \
2841   do {                                                                  \
2842     (c1) -= 32;                                                         \
2843     if (c1 < 81)                /* old format (before ver.21) */        \
2844       {                                                                 \
2845         int gref = (c1) / 9;                                            \
2846         int nref = (c1) % 9;                                            \
2847         if (gref == 4) gref = 10;                                       \
2848         if (nref == 4) nref = 10;                                       \
2849         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
2850       }                                                                 \
2851     else if (c1 < 93)           /* new format (after ver.21) */         \
2852       {                                                                 \
2853         ONE_MORE_BYTE (c2);                                             \
2854         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
2855       }                                                                 \
2856     else                                                                \
2857       c1 = 0;                                                           \
2858   } while (0)
2859
2860
2861 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2862
2863 static void
2864 decode_coding_iso_2022 (coding)
2865      struct coding_system *coding;
2866 {
2867   const unsigned char *src = coding->source + coding->consumed;
2868   const unsigned char *src_end = coding->source + coding->src_bytes;
2869   const unsigned char *src_base;
2870   int *charbuf = coding->charbuf;
2871   int *charbuf_end
2872     = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
2873   int consumed_chars = 0, consumed_chars_base;
2874   int multibytep = coding->src_multibyte;
2875   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
2876   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2877   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2878   struct charset *charset;
2879   int c;
2880   /* For handling composition sequence.  */
2881 #define COMPOSING_NO                    0
2882 #define COMPOSING_CHAR                  1
2883 #define COMPOSING_RULE                  2
2884 #define COMPOSING_COMPONENT_CHAR        3
2885 #define COMPOSING_COMPONENT_RULE        4
2886
2887   int composition_state = COMPOSING_NO;
2888   enum composition_method method;
2889   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2890   int component_idx;
2891   int component_len;
2892   Lisp_Object attrs, charset_list;
2893   int char_offset = coding->produced_char;
2894   int last_offset = char_offset;
2895   int last_id = charset_ascii;
2896
2897   CODING_GET_INFO (coding, attrs, charset_list);
2898   setup_iso_safe_charsets (attrs);
2899
2900   while (1)
2901     {
2902       int c1, c2;
2903
2904       src_base = src;
2905       consumed_chars_base = consumed_chars;
2906
2907       if (charbuf >= charbuf_end)
2908         break;
2909
2910       ONE_MORE_BYTE (c1);
2911       if (c1 < 0)
2912         goto invalid_code;
2913
2914       /* We produce at most one character.  */
2915       switch (iso_code_class [c1])
2916         {
2917         case ISO_0x20_or_0x7F:
2918           if (composition_state != COMPOSING_NO)
2919             {
2920               if (composition_state == COMPOSING_RULE
2921                   || composition_state == COMPOSING_COMPONENT_RULE)
2922                 {
2923                   DECODE_COMPOSITION_RULE (c1);
2924                   components[component_idx++] = c1;
2925                   composition_state--;
2926                   continue;
2927                 }
2928             }
2929           if (charset_id_0 < 0
2930               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2931             /* This is SPACE or DEL.  */
2932             charset = CHARSET_FROM_ID (charset_ascii);
2933           else
2934             charset = CHARSET_FROM_ID (charset_id_0);
2935           break;
2936
2937         case ISO_graphic_plane_0:
2938           if (composition_state != COMPOSING_NO)
2939             {
2940               if (composition_state == COMPOSING_RULE
2941                   || composition_state == COMPOSING_COMPONENT_RULE)
2942                 {
2943                   DECODE_COMPOSITION_RULE (c1);
2944                   components[component_idx++] = c1;
2945                   composition_state--;
2946                   continue;
2947                 }
2948             }
2949           charset = CHARSET_FROM_ID (charset_id_0);
2950           break;
2951
2952         case ISO_0xA0_or_0xFF:
2953           if (charset_id_1 < 0
2954               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2955               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2956             goto invalid_code;
2957           /* This is a graphic character, we fall down ... */
2958
2959         case ISO_graphic_plane_1:
2960           if (charset_id_1 < 0)
2961             goto invalid_code;
2962           charset = CHARSET_FROM_ID (charset_id_1);
2963           break;
2964
2965         case ISO_control_0:
2966           MAYBE_FINISH_COMPOSITION ();
2967           charset = CHARSET_FROM_ID (charset_ascii);
2968           break;
2969
2970         case ISO_control_1:
2971           MAYBE_FINISH_COMPOSITION ();
2972           goto invalid_code;
2973
2974         case ISO_shift_out:
2975           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2976               || CODING_ISO_DESIGNATION (coding, 1) < 0)
2977             goto invalid_code;
2978           CODING_ISO_INVOCATION (coding, 0) = 1;
2979           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2980           continue;
2981
2982         case ISO_shift_in:
2983           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2984             goto invalid_code;
2985           CODING_ISO_INVOCATION (coding, 0) = 0;
2986           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2987           continue;
2988
2989         case ISO_single_shift_2_7:
2990         case ISO_single_shift_2:
2991           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2992             goto invalid_code;
2993           /* SS2 is handled as an escape sequence of ESC 'N' */
2994           c1 = 'N';
2995           goto label_escape_sequence;
2996
2997         case ISO_single_shift_3:
2998           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2999             goto invalid_code;
3000           /* SS2 is handled as an escape sequence of ESC 'O' */
3001           c1 = 'O';
3002           goto label_escape_sequence;
3003
3004         case ISO_control_sequence_introducer:
3005           /* CSI is handled as an escape sequence of ESC '[' ...  */
3006           c1 = '[';
3007           goto label_escape_sequence;
3008
3009         case ISO_escape:
3010           ONE_MORE_BYTE (c1);
3011         label_escape_sequence:
3012           /* Escape sequences handled here are invocation,
3013              designation, direction specification, and character
3014              composition specification.  */
3015           switch (c1)
3016             {
3017             case '&':           /* revision of following character set */
3018               ONE_MORE_BYTE (c1);
3019               if (!(c1 >= '@' && c1 <= '~'))
3020                 goto invalid_code;
3021               ONE_MORE_BYTE (c1);
3022               if (c1 != ISO_CODE_ESC)
3023                 goto invalid_code;
3024               ONE_MORE_BYTE (c1);
3025               goto label_escape_sequence;
3026
3027             case '$':           /* designation of 2-byte character set */
3028               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3029                 goto invalid_code;
3030               ONE_MORE_BYTE (c1);
3031               if (c1 >= '@' && c1 <= 'B')
3032                 {       /* designation of JISX0208.1978, GB2312.1980,
3033                            or JISX0208.1980 */
3034                   DECODE_DESIGNATION (0, 2, 0, c1);
3035                 }
3036               else if (c1 >= 0x28 && c1 <= 0x2B)
3037                 {       /* designation of DIMENSION2_CHARS94 character set */
3038                   ONE_MORE_BYTE (c2);
3039                   DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
3040                 }
3041               else if (c1 >= 0x2C && c1 <= 0x2F)
3042                 {       /* designation of DIMENSION2_CHARS96 character set */
3043                   ONE_MORE_BYTE (c2);
3044                   DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
3045                 }
3046               else
3047                 goto invalid_code;
3048               /* We must update these variables now.  */
3049               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3050               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3051               continue;
3052
3053             case 'n':           /* invocation of locking-shift-2 */
3054               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3055                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3056                 goto invalid_code;
3057               CODING_ISO_INVOCATION (coding, 0) = 2;
3058               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3059               continue;
3060
3061             case 'o':           /* invocation of locking-shift-3 */
3062               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3063                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3064                 goto invalid_code;
3065               CODING_ISO_INVOCATION (coding, 0) = 3;
3066               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3067               continue;
3068
3069             case 'N':           /* invocation of single-shift-2 */
3070               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3071                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3072                 goto invalid_code;
3073               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
3074               ONE_MORE_BYTE (c1);
3075               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3076                 goto invalid_code;
3077               break;
3078
3079             case 'O':           /* invocation of single-shift-3 */
3080               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3081                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3082                 goto invalid_code;
3083               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
3084               ONE_MORE_BYTE (c1);
3085               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3086                 goto invalid_code;
3087               break;
3088
3089             case '0': case '2': case '3': case '4': /* start composition */
3090               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3091                 goto invalid_code;
3092               DECODE_COMPOSITION_START (c1);
3093               continue;
3094
3095             case '1':           /* end composition */
3096               if (composition_state == COMPOSING_NO)
3097                 goto invalid_code;
3098               DECODE_COMPOSITION_END ();
3099               continue;
3100
3101             case '[':           /* specification of direction */
3102               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3103                 goto invalid_code;
3104               /* For the moment, nested direction is not supported.
3105                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3106                  left-to-right, and nozero means right-to-left.  */
3107               ONE_MORE_BYTE (c1);
3108               switch (c1)
3109                 {
3110                 case ']':       /* end of the current direction */
3111                   coding->mode &= ~CODING_MODE_DIRECTION;
3112
3113                 case '0':       /* end of the current direction */
3114                 case '1':       /* start of left-to-right direction */
3115                   ONE_MORE_BYTE (c1);
3116                   if (c1 == ']')
3117                     coding->mode &= ~CODING_MODE_DIRECTION;
3118                   else
3119                     goto invalid_code;
3120                   break;
3121
3122                 case '2':       /* start of right-to-left direction */
3123                   ONE_MORE_BYTE (c1);
3124                   if (c1 == ']')
3125                     coding->mode |= CODING_MODE_DIRECTION;
3126                   else
3127                     goto invalid_code;
3128                   break;
3129
3130                 default:
3131                   goto invalid_code;
3132                 }
3133               continue;
3134
3135             case '%':
3136               ONE_MORE_BYTE (c1);
3137               if (c1 == '/')
3138                 {
3139                   /* CTEXT extended segment:
3140                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3141                      We keep these bytes as is for the moment.
3142                      They may be decoded by post-read-conversion.  */
3143                   int dim, M, L;
3144                   int size;
3145
3146                   ONE_MORE_BYTE (dim);
3147                   ONE_MORE_BYTE (M);
3148                   ONE_MORE_BYTE (L);
3149                   size = ((M - 128) * 128) + (L - 128);
3150                   if (charbuf + 8 + size > charbuf_end)
3151                     goto break_loop;
3152                   *charbuf++ = ISO_CODE_ESC;
3153                   *charbuf++ = '%';
3154                   *charbuf++ = '/';
3155                   *charbuf++ = dim;
3156                   *charbuf++ = BYTE8_TO_CHAR (M);
3157                   *charbuf++ = BYTE8_TO_CHAR (L);
3158                   while (size-- > 0)
3159                     {
3160                       ONE_MORE_BYTE (c1);
3161                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3162                     }
3163                 }
3164               else if (c1 == 'G')
3165                 {
3166                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3167                      ESC % G --UTF-8-BYTES-- ESC % @
3168                      We keep these bytes as is for the moment.
3169                      They may be decoded by post-read-conversion.  */
3170                   int *p = charbuf;
3171
3172                   if (p + 6 > charbuf_end)
3173                     goto break_loop;
3174                   *p++ = ISO_CODE_ESC;
3175                   *p++ = '%';
3176                   *p++ = 'G';
3177                   while (p < charbuf_end)
3178                     {
3179                       ONE_MORE_BYTE (c1);
3180                       if (c1 == ISO_CODE_ESC
3181                           && src + 1 < src_end
3182                           && src[0] == '%'
3183                           && src[1] == '@')
3184                         break;
3185                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3186                     }
3187                   if (p + 3 > charbuf_end)
3188                     goto break_loop;
3189                   *p++ = ISO_CODE_ESC;
3190                   *p++ = '%';
3191                   *p++ = '@';
3192                   charbuf = p;
3193                 }
3194               else
3195                 goto invalid_code;
3196               continue;
3197               break;
3198
3199             default:
3200               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3201                 goto invalid_code;
3202               if (c1 >= 0x28 && c1 <= 0x2B)
3203                 {       /* designation of DIMENSION1_CHARS94 character set */
3204                   ONE_MORE_BYTE (c2);
3205                   DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
3206                 }
3207               else if (c1 >= 0x2C && c1 <= 0x2F)
3208                 {       /* designation of DIMENSION1_CHARS96 character set */
3209                   ONE_MORE_BYTE (c2);
3210                   DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
3211                 }
3212               else
3213                 goto invalid_code;
3214               /* We must update these variables now.  */
3215               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3216               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3217               continue;
3218             }
3219         }
3220
3221       if (charset->id != charset_ascii
3222           && last_id != charset->id)
3223         {
3224           if (last_id != charset_ascii)
3225             ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3226           last_id = charset->id;
3227           last_offset = char_offset;
3228         }
3229
3230       /* Now we know CHARSET and 1st position code C1 of a character.
3231          Produce a decoded character while getting 2nd position code
3232          C2 if necessary.  */
3233       c1 &= 0x7F;
3234       if (CHARSET_DIMENSION (charset) > 1)
3235         {
3236           ONE_MORE_BYTE (c2);
3237           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3238             /* C2 is not in a valid range.  */
3239             goto invalid_code;
3240           c1 = (c1 << 8) | (c2 & 0x7F);
3241           if (CHARSET_DIMENSION (charset) > 2)
3242             {
3243               ONE_MORE_BYTE (c2);
3244               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3245                 /* C2 is not in a valid range.  */
3246                 goto invalid_code;
3247               c1 = (c1 << 8) | (c2 & 0x7F);
3248             }
3249         }
3250
3251       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3252       if (c < 0)
3253         {
3254           MAYBE_FINISH_COMPOSITION ();
3255           for (; src_base < src; src_base++, char_offset++)
3256             {
3257               if (ASCII_BYTE_P (*src_base))
3258                 *charbuf++ = *src_base;
3259               else
3260                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3261             }
3262         }
3263       else if (composition_state == COMPOSING_NO)
3264         {
3265           *charbuf++ = c;
3266           char_offset++;
3267         }
3268       else
3269         {
3270           components[component_idx++] = c;
3271           if (method == COMPOSITION_WITH_RULE
3272               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3273                   && composition_state == COMPOSING_COMPONENT_CHAR))
3274             composition_state++;
3275         }
3276       continue;
3277
3278     invalid_code:
3279       MAYBE_FINISH_COMPOSITION ();
3280       src = src_base;
3281       consumed_chars = consumed_chars_base;
3282       ONE_MORE_BYTE (c);
3283       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3284       char_offset++;
3285       coding->errors++;
3286       continue;
3287
3288     break_loop:
3289       break;
3290     }
3291
3292  no_more_source:
3293   if (last_id != charset_ascii)
3294     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3295   coding->consumed_char += consumed_chars_base;
3296   coding->consumed = src_base - coding->source;
3297   coding->charbuf_used = charbuf - coding->charbuf;
3298 }
3299
3300
3301 /* ISO2022 encoding stuff.  */
3302
3303 /*
3304    It is not enough to say just "ISO2022" on encoding, we have to
3305    specify more details.  In Emacs, each coding system of ISO2022
3306    variant has the following specifications:
3307         1. Initial designation to G0 thru G3.
3308         2. Allows short-form designation?
3309         3. ASCII should be designated to G0 before control characters?
3310         4. ASCII should be designated to G0 at end of line?
3311         5. 7-bit environment or 8-bit environment?
3312         6. Use locking-shift?
3313         7. Use Single-shift?
3314    And the following two are only for Japanese:
3315         8. Use ASCII in place of JIS0201-1976-Roman?
3316         9. Use JISX0208-1983 in place of JISX0208-1978?
3317    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3318    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3319    details.
3320 */
3321
3322 /* Produce codes (escape sequence) for designating CHARSET to graphic
3323    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3324    '@', 'A', or 'B' and the coding system CODING allows, produce
3325    designation sequence of short-form.  */
3326
3327 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3328   do {                                                                  \
3329     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3330     char *intermediate_char_94 = "()*+";                                \
3331     char *intermediate_char_96 = ",-./";                                \
3332     int revision = -1;                                                  \
3333     int c;                                                              \
3334                                                                         \
3335     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3336       revision = CHARSET_ISO_REVISION (charset);                        \
3337                                                                         \
3338     if (revision >= 0)                                                  \
3339       {                                                                 \
3340         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3341         EMIT_ONE_BYTE ('@' + revision);                                 \
3342       }                                                                 \
3343     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3344     if (CHARSET_DIMENSION (charset) == 1)                               \
3345       {                                                                 \
3346         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3347           c = intermediate_char_94[reg];                                \
3348         else                                                            \
3349           c = intermediate_char_96[reg];                                \
3350         EMIT_ONE_ASCII_BYTE (c);                                        \
3351       }                                                                 \
3352     else                                                                \
3353       {                                                                 \
3354         EMIT_ONE_ASCII_BYTE ('$');                                      \
3355         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3356           {                                                             \
3357             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3358                 || reg != 0                                             \
3359                 || final_char < '@' || final_char > 'B')                \
3360               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3361           }                                                             \
3362         else                                                            \
3363           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3364       }                                                                 \
3365     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3366                                                                         \
3367     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3368   } while (0)
3369
3370
3371 /* The following two macros produce codes (control character or escape
3372    sequence) for ISO2022 single-shift functions (single-shift-2 and
3373    single-shift-3).  */
3374
3375 #define ENCODE_SINGLE_SHIFT_2                                           \
3376   do {                                                                  \
3377     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3378       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3379     else                                                                \
3380       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3381     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3382   } while (0)
3383
3384
3385 #define ENCODE_SINGLE_SHIFT_3                                           \
3386   do {                                                                  \
3387     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3388       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3389     else                                                                \
3390       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3391     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3392   } while (0)
3393
3394
3395 /* The following four macros produce codes (control character or
3396    escape sequence) for ISO2022 locking-shift functions (shift-in,
3397    shift-out, locking-shift-2, and locking-shift-3).  */
3398
3399 #define ENCODE_SHIFT_IN                                 \
3400   do {                                                  \
3401     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3402     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3403   } while (0)
3404
3405
3406 #define ENCODE_SHIFT_OUT                                \
3407   do {                                                  \
3408     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3409     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3410   } while (0)
3411
3412
3413 #define ENCODE_LOCKING_SHIFT_2                          \
3414   do {                                                  \
3415     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3416     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3417   } while (0)
3418
3419
3420 #define ENCODE_LOCKING_SHIFT_3                          \
3421   do {                                                  \
3422     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3423     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3424   } while (0)
3425
3426
3427 /* Produce codes for a DIMENSION1 character whose character set is
3428    CHARSET and whose position-code is C1.  Designation and invocation
3429    sequences are also produced in advance if necessary.  */
3430
3431 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3432   do {                                                                  \
3433     int id = CHARSET_ID (charset);                                      \
3434                                                                         \
3435     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3436         && id == charset_ascii)                                         \
3437       {                                                                 \
3438         id = charset_jisx0201_roman;                                    \
3439         charset = CHARSET_FROM_ID (id);                                 \
3440       }                                                                 \
3441                                                                         \
3442     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3443       {                                                                 \
3444         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3445           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3446         else                                                            \
3447           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3448         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3449         break;                                                          \
3450       }                                                                 \
3451     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3452       {                                                                 \
3453         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3454         break;                                                          \
3455       }                                                                 \
3456     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3457       {                                                                 \
3458         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3459         break;                                                          \
3460       }                                                                 \
3461     else                                                                \
3462       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3463          must invoke it, or, at first, designate it to some graphic     \
3464          register.  Then repeat the loop to actually produce the        \
3465          character.  */                                                 \
3466       dst = encode_invocation_designation (charset, coding, dst,        \
3467                                            &produced_chars);            \
3468   } while (1)
3469
3470
3471 /* Produce codes for a DIMENSION2 character whose character set is
3472    CHARSET and whose position-codes are C1 and C2.  Designation and
3473    invocation codes are also produced in advance if necessary.  */
3474
3475 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3476   do {                                                                  \
3477     int id = CHARSET_ID (charset);                                      \
3478                                                                         \
3479     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3480         && id == charset_jisx0208)                                      \
3481       {                                                                 \
3482         id = charset_jisx0208_1978;                                     \
3483         charset = CHARSET_FROM_ID (id);                                 \
3484       }                                                                 \
3485                                                                         \
3486     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3487       {                                                                 \
3488         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3489           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3490         else                                                            \
3491           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3492         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3493         break;                                                          \
3494       }                                                                 \
3495     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3496       {                                                                 \
3497         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3498         break;                                                          \
3499       }                                                                 \
3500     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3501       {                                                                 \
3502         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3503         break;                                                          \
3504       }                                                                 \
3505     else                                                                \
3506       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3507          must invoke it, or, at first, designate it to some graphic     \
3508          register.  Then repeat the loop to actually produce the        \
3509          character.  */                                                 \
3510       dst = encode_invocation_designation (charset, coding, dst,        \
3511                                            &produced_chars);            \
3512   } while (1)
3513
3514
3515 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3516   do {                                                                     \
3517     int code = ENCODE_CHAR ((charset),(c));                                \
3518                                                                            \
3519     if (CHARSET_DIMENSION (charset) == 1)                                  \
3520       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3521     else                                                                   \
3522       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3523   } while (0)
3524
3525
3526 /* Produce designation and invocation codes at a place pointed by DST
3527    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3528    Return new DST.  */
3529
3530 unsigned char *
3531 encode_invocation_designation (charset, coding, dst, p_nchars)
3532      struct charset *charset;
3533      struct coding_system *coding;
3534      unsigned char *dst;
3535      int *p_nchars;
3536 {
3537   int multibytep = coding->dst_multibyte;
3538   int produced_chars = *p_nchars;
3539   int reg;                      /* graphic register number */
3540   int id = CHARSET_ID (charset);
3541
3542   /* At first, check designations.  */
3543   for (reg = 0; reg < 4; reg++)
3544     if (id == CODING_ISO_DESIGNATION (coding, reg))
3545       break;
3546
3547   if (reg >= 4)
3548     {
3549       /* CHARSET is not yet designated to any graphic registers.  */
3550       /* At first check the requested designation.  */
3551       reg = CODING_ISO_REQUEST (coding, id);
3552       if (reg < 0)
3553         /* Since CHARSET requests no special designation, designate it
3554            to graphic register 0.  */
3555         reg = 0;
3556
3557       ENCODE_DESIGNATION (charset, reg, coding);
3558     }
3559
3560   if (CODING_ISO_INVOCATION (coding, 0) != reg
3561       && CODING_ISO_INVOCATION (coding, 1) != reg)
3562     {
3563       /* Since the graphic register REG is not invoked to any graphic
3564          planes, invoke it to graphic plane 0.  */
3565       switch (reg)
3566         {
3567         case 0:                 /* graphic register 0 */
3568           ENCODE_SHIFT_IN;
3569           break;
3570
3571         case 1:                 /* graphic register 1 */
3572           ENCODE_SHIFT_OUT;
3573           break;
3574
3575         case 2:                 /* graphic register 2 */
3576           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3577             ENCODE_SINGLE_SHIFT_2;
3578           else
3579             ENCODE_LOCKING_SHIFT_2;
3580           break;
3581
3582         case 3:                 /* graphic register 3 */
3583           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3584             ENCODE_SINGLE_SHIFT_3;
3585           else
3586             ENCODE_LOCKING_SHIFT_3;
3587           break;
3588         }
3589     }
3590
3591   *p_nchars = produced_chars;
3592   return dst;
3593 }
3594
3595 /* The following three macros produce codes for indicating direction
3596    of text.  */
3597 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3598   do {                                                                  \
3599     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3600       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3601     else                                                                \
3602       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3603   } while (0)
3604
3605
3606 #define ENCODE_DIRECTION_R2L()                  \
3607   do {                                          \
3608     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3609     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3610   } while (0)
3611
3612
3613 #define ENCODE_DIRECTION_L2R()                  \
3614   do {                                          \
3615     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3616     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3617   } while (0)
3618
3619
3620 /* Produce codes for designation and invocation to reset the graphic
3621    planes and registers to initial state.  */
3622 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3623   do {                                                                  \
3624     int reg;                                                            \
3625     struct charset *charset;                                            \
3626                                                                         \
3627     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3628       ENCODE_SHIFT_IN;                                                  \
3629     for (reg = 0; reg < 4; reg++)                                       \
3630       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3631           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3632               != CODING_ISO_INITIAL (coding, reg)))                     \
3633         {                                                               \
3634           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3635           ENCODE_DESIGNATION (charset, reg, coding);                    \
3636         }                                                               \
3637   } while (0)
3638
3639
3640 /* Produce designation sequences of charsets in the line started from
3641    SRC to a place pointed by DST, and return updated DST.
3642
3643    If the current block ends before any end-of-line, we may fail to
3644    find all the necessary designations.  */
3645
3646 static unsigned char *
3647 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3648      struct coding_system *coding;
3649      int *charbuf, *charbuf_end;
3650      unsigned char *dst;
3651 {
3652   struct charset *charset;
3653   /* Table of charsets to be designated to each graphic register.  */
3654   int r[4];
3655   int c, found = 0, reg;
3656   int produced_chars = 0;
3657   int multibytep = coding->dst_multibyte;
3658   Lisp_Object attrs;
3659   Lisp_Object charset_list;
3660
3661   attrs = CODING_ID_ATTRS (coding->id);
3662   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3663   if (EQ (charset_list, Qiso_2022))
3664     charset_list = Viso_2022_charset_list;
3665
3666   for (reg = 0; reg < 4; reg++)
3667     r[reg] = -1;
3668
3669   while (found < 4)
3670     {
3671       int id;
3672
3673       c = *charbuf++;
3674       if (c == '\n')
3675         break;
3676       charset = char_charset (c, charset_list, NULL);
3677       id = CHARSET_ID (charset);
3678       reg = CODING_ISO_REQUEST (coding, id);
3679       if (reg >= 0 && r[reg] < 0)
3680         {
3681           found++;
3682           r[reg] = id;
3683         }
3684     }
3685
3686   if (found)
3687     {
3688       for (reg = 0; reg < 4; reg++)
3689         if (r[reg] >= 0
3690             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3691           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3692     }
3693
3694   return dst;
3695 }
3696
3697 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
3698
3699 static int
3700 encode_coding_iso_2022 (coding)
3701      struct coding_system *coding;
3702 {
3703   int multibytep = coding->dst_multibyte;
3704   int *charbuf = coding->charbuf;
3705   int *charbuf_end = charbuf + coding->charbuf_used;
3706   unsigned char *dst = coding->destination + coding->produced;
3707   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3708   int safe_room = 16;
3709   int bol_designation
3710     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3711        && CODING_ISO_BOL (coding));
3712   int produced_chars = 0;
3713   Lisp_Object attrs, eol_type, charset_list;
3714   int ascii_compatible;
3715   int c;
3716   int preferred_charset_id = -1;
3717
3718   CODING_GET_INFO (coding, attrs, charset_list);
3719   eol_type = CODING_ID_EOL_TYPE (coding->id);
3720   if (VECTORP (eol_type))
3721     eol_type = Qunix;
3722
3723   setup_iso_safe_charsets (attrs);
3724   /* Charset list may have been changed.  */
3725   charset_list = CODING_ATTR_CHARSET_LIST (attrs);              \
3726   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3727
3728   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3729
3730   while (charbuf < charbuf_end)
3731     {
3732       ASSURE_DESTINATION (safe_room);
3733
3734       if (bol_designation)
3735         {
3736           unsigned char *dst_prev = dst;
3737
3738           /* We have to produce designation sequences if any now.  */
3739           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3740           bol_designation = 0;
3741           /* We are sure that designation sequences are all ASCII bytes.  */
3742           produced_chars += dst - dst_prev;
3743         }
3744
3745       c = *charbuf++;
3746
3747       if (c < 0)
3748         {
3749           /* Handle an annotation.  */
3750           switch (*charbuf)
3751             {
3752             case CODING_ANNOTATE_COMPOSITION_MASK:
3753               /* Not yet implemented.  */
3754               break;
3755             case CODING_ANNOTATE_CHARSET_MASK:
3756               preferred_charset_id = charbuf[3];
3757               if (preferred_charset_id >= 0
3758                   && NILP (Fmemq (make_number (preferred_charset_id),
3759                                   charset_list)))
3760                 preferred_charset_id = -1;
3761               break;
3762             default:
3763               abort ();
3764             }
3765           charbuf += -c - 1;
3766           continue;
3767         }
3768
3769       /* Now encode the character C.  */
3770       if (c < 0x20 || c == 0x7F)
3771         {
3772           if (c == '\n'
3773               || (c == '\r' && EQ (eol_type, Qmac)))
3774             {
3775               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3776                 ENCODE_RESET_PLANE_AND_REGISTER ();
3777               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3778                 {
3779                   int i;
3780
3781                   for (i = 0; i < 4; i++)
3782                     CODING_ISO_DESIGNATION (coding, i)
3783                       = CODING_ISO_INITIAL (coding, i);
3784                 }
3785               bol_designation
3786                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3787             }
3788           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3789             ENCODE_RESET_PLANE_AND_REGISTER ();
3790           EMIT_ONE_ASCII_BYTE (c);
3791         }
3792       else if (ASCII_CHAR_P (c))
3793         {
3794           if (ascii_compatible)
3795             EMIT_ONE_ASCII_BYTE (c);
3796           else
3797             {
3798               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3799               ENCODE_ISO_CHARACTER (charset, c);
3800             }
3801         }
3802       else if (CHAR_BYTE8_P (c))
3803         {
3804           c = CHAR_TO_BYTE8 (c);
3805           EMIT_ONE_BYTE (c);
3806         }
3807       else
3808         {
3809           struct charset *charset;
3810
3811           if (preferred_charset_id >= 0)
3812             {
3813               charset = CHARSET_FROM_ID (preferred_charset_id);
3814               if (! CHAR_CHARSET_P (c, charset))
3815                 charset = char_charset (c, charset_list, NULL);
3816             }
3817           else
3818             charset = char_charset (c, charset_list, NULL);
3819           if (!charset)
3820             {
3821               if (coding->mode & CODING_MODE_SAFE_ENCODING)
3822                 {
3823                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3824                   charset = CHARSET_FROM_ID (charset_ascii);
3825                 }
3826               else
3827                 {
3828                   c = coding->default_char;
3829                   charset = char_charset (c, charset_list, NULL);
3830                 }
3831             }
3832           ENCODE_ISO_CHARACTER (charset, c);
3833         }
3834     }
3835
3836   if (coding->mode & CODING_MODE_LAST_BLOCK
3837       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3838     {
3839       ASSURE_DESTINATION (safe_room);
3840       ENCODE_RESET_PLANE_AND_REGISTER ();
3841     }
3842   record_conversion_result (coding, CODING_RESULT_SUCCESS);
3843   CODING_ISO_BOL (coding) = bol_designation;
3844   coding->produced_char += produced_chars;
3845   coding->produced = dst - coding->destination;
3846   return 0;
3847 }
3848
3849 \f
3850 /*** 8,9. SJIS and BIG5 handlers ***/
3851
3852 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3853    quite widely.  So, for the moment, Emacs supports them in the bare
3854    C code.  But, in the future, they may be supported only by CCL.  */
3855
3856 /* SJIS is a coding system encoding three character sets: ASCII, right
3857    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
3858    as is.  A character of charset katakana-jisx0201 is encoded by
3859    "position-code + 0x80".  A character of charset japanese-jisx0208
3860    is encoded in 2-byte but two position-codes are divided and shifted
3861    so that it fit in the range below.
3862
3863    --- CODE RANGE of SJIS ---
3864    (character set)      (range)
3865    ASCII                0x00 .. 0x7F
3866    KATAKANA-JISX0201    0xA0 .. 0xDF
3867    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
3868             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
3869    -------------------------------
3870
3871 */
3872
3873 /* BIG5 is a coding system encoding two character sets: ASCII and
3874    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
3875    character set and is encoded in two-byte.
3876
3877    --- CODE RANGE of BIG5 ---
3878    (character set)      (range)
3879    ASCII                0x00 .. 0x7F
3880    Big5 (1st byte)      0xA1 .. 0xFE
3881         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
3882    --------------------------
3883
3884   */
3885
3886 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3887    Check if a text is encoded in SJIS.  If it is, return
3888    CATEGORY_MASK_SJIS, else return 0.  */
3889
3890 static int
3891 detect_coding_sjis (coding, detect_info)
3892      struct coding_system *coding;
3893      struct coding_detection_info *detect_info;
3894 {
3895   const unsigned char *src = coding->source, *src_base;
3896   const unsigned char *src_end = coding->source + coding->src_bytes;
3897   int multibytep = coding->src_multibyte;
3898   int consumed_chars = 0;
3899   int found = 0;
3900   int c;
3901
3902   detect_info->checked |= CATEGORY_MASK_SJIS;
3903   /* A coding system of this category is always ASCII compatible.  */
3904   src += coding->head_ascii;
3905
3906   while (1)
3907     {
3908       src_base = src;
3909       ONE_MORE_BYTE (c);
3910       if (c < 0x80)
3911         continue;
3912       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3913         {
3914           ONE_MORE_BYTE (c);
3915           if (c < 0x40 || c == 0x7F || c > 0xFC)
3916             break;
3917           found = CATEGORY_MASK_SJIS;
3918         }
3919       else if (c >= 0xA0 && c < 0xE0)
3920         found = CATEGORY_MASK_SJIS;
3921       else
3922         break;
3923     }
3924   detect_info->rejected |= CATEGORY_MASK_SJIS;
3925   return 0;
3926
3927  no_more_source:
3928   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
3929     {
3930       detect_info->rejected |= CATEGORY_MASK_SJIS;
3931       return 0;
3932     }
3933   detect_info->found |= found;
3934   return 1;
3935 }
3936
3937 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3938    Check if a text is encoded in BIG5.  If it is, return
3939    CATEGORY_MASK_BIG5, else return 0.  */
3940
3941 static int
3942 detect_coding_big5 (coding, detect_info)
3943      struct coding_system *coding;
3944      struct coding_detection_info *detect_info;
3945 {
3946   const unsigned char *src = coding->source, *src_base;
3947   const unsigned char *src_end = coding->source + coding->src_bytes;
3948   int multibytep = coding->src_multibyte;
3949   int consumed_chars = 0;
3950   int found = 0;
3951   int c;
3952
3953   detect_info->checked |= CATEGORY_MASK_BIG5;
3954   /* A coding system of this category is always ASCII compatible.  */
3955   src += coding->head_ascii;
3956
3957   while (1)
3958     {
3959       src_base = src;
3960       ONE_MORE_BYTE (c);
3961       if (c < 0x80)
3962         continue;
3963       if (c >= 0xA1)
3964         {
3965           ONE_MORE_BYTE (c);
3966           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3967             return 0;
3968           found = CATEGORY_MASK_BIG5;
3969         }
3970       else
3971         break;
3972     }
3973   detect_info->rejected |= CATEGORY_MASK_BIG5;
3974   return 0;
3975
3976  no_more_source:
3977   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
3978     {
3979       detect_info->rejected |= CATEGORY_MASK_BIG5;
3980       return 0;
3981     }
3982   detect_info->found |= found;
3983   return 1;
3984 }
3985
3986 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3987    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3988
3989 static void
3990 decode_coding_sjis (coding)
3991      struct coding_system *coding;
3992 {
3993   const unsigned char *src = coding->source + coding->consumed;
3994   const unsigned char *src_end = coding->source + coding->src_bytes;
3995   const unsigned char *src_base;
3996   int *charbuf = coding->charbuf;
3997   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
3998   int consumed_chars = 0, consumed_chars_base;
3999   int multibytep = coding->src_multibyte;
4000   struct charset *charset_roman, *charset_kanji, *charset_kana;
4001   struct charset *charset_kanji2;
4002   Lisp_Object attrs, charset_list, val;
4003   int char_offset = coding->produced_char;
4004   int last_offset = char_offset;
4005   int last_id = charset_ascii;
4006
4007   CODING_GET_INFO (coding, attrs, charset_list);
4008
4009   val = charset_list;
4010   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4011   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4012   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4013   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4014
4015   while (1)
4016     {
4017       int c, c1;
4018       struct charset *charset;
4019
4020       src_base = src;
4021       consumed_chars_base = consumed_chars;
4022
4023       if (charbuf >= charbuf_end)
4024         break;
4025
4026       ONE_MORE_BYTE (c);
4027       if (c < 0)
4028         goto invalid_code;
4029       if (c < 0x80)
4030         charset = charset_roman;
4031       else if (c == 0x80 || c == 0xA0)
4032         goto invalid_code;
4033       else if (c >= 0xA1 && c <= 0xDF)
4034         {
4035           /* SJIS -> JISX0201-Kana */
4036           c &= 0x7F;
4037           charset = charset_kana;
4038         }
4039       else if (c <= 0xEF)
4040         {
4041           /* SJIS -> JISX0208 */
4042           ONE_MORE_BYTE (c1);
4043           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4044             goto invalid_code;
4045           c = (c << 8) | c1;
4046           SJIS_TO_JIS (c);
4047           charset = charset_kanji;
4048         }
4049       else if (c <= 0xFC && charset_kanji2)
4050         {
4051           /* SJIS -> JISX0212 */
4052           ONE_MORE_BYTE (c1);
4053           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4054             goto invalid_code;
4055           c = (c << 8) | c1;
4056           SJIS_TO_JIS2 (c);
4057           charset = charset_kanji2;
4058         }
4059       else
4060         goto invalid_code;
4061       if (charset->id != charset_ascii
4062           && last_id != charset->id)
4063         {
4064           if (last_id != charset_ascii)
4065             ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4066           last_id = charset->id;
4067           last_offset = char_offset;
4068         }
4069       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4070       *charbuf++ = c;
4071       char_offset++;
4072       continue;
4073
4074     invalid_code:
4075       src = src_base;
4076       consumed_chars = consumed_chars_base;
4077       ONE_MORE_BYTE (c);
4078       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4079       char_offset++;
4080       coding->errors++;
4081     }
4082
4083  no_more_source:
4084   if (last_id != charset_ascii)
4085     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4086   coding->consumed_char += consumed_chars_base;
4087   coding->consumed = src_base - coding->source;
4088   coding->charbuf_used = charbuf - coding->charbuf;
4089 }
4090
4091 static void
4092 decode_coding_big5 (coding)
4093      struct coding_system *coding;
4094 {
4095   const unsigned char *src = coding->source + coding->consumed;
4096   const unsigned char *src_end = coding->source + coding->src_bytes;
4097   const unsigned char *src_base;
4098   int *charbuf = coding->charbuf;
4099   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4100   int consumed_chars = 0, consumed_chars_base;
4101   int multibytep = coding->src_multibyte;
4102   struct charset *charset_roman, *charset_big5;
4103   Lisp_Object attrs, charset_list, val;
4104   int char_offset = coding->produced_char;
4105   int last_offset = char_offset;
4106   int last_id = charset_ascii;
4107
4108   CODING_GET_INFO (coding, attrs, charset_list);
4109   val = charset_list;
4110   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4111   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4112
4113   while (1)
4114     {
4115       int c, c1;
4116       struct charset *charset;
4117
4118       src_base = src;
4119       consumed_chars_base = consumed_chars;
4120
4121       if (charbuf >= charbuf_end)
4122         break;
4123
4124       ONE_MORE_BYTE (c);
4125
4126       if (c < 0)
4127         goto invalid_code;
4128       if (c < 0x80)
4129         charset = charset_roman;
4130       else
4131         {
4132           /* BIG5 -> Big5 */
4133           if (c < 0xA1 || c > 0xFE)
4134             goto invalid_code;
4135           ONE_MORE_BYTE (c1);
4136           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4137             goto invalid_code;
4138           c = c << 8 | c1;
4139           charset = charset_big5;
4140         }
4141       if (charset->id != charset_ascii
4142           && last_id != charset->id)
4143         {
4144           if (last_id != charset_ascii)
4145             ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4146           last_id = charset->id;
4147           last_offset = char_offset;
4148         }
4149       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4150       *charbuf++ = c;
4151       char_offset++;
4152       continue;
4153
4154     invalid_code:
4155       src = src_base;
4156       consumed_chars = consumed_chars_base;
4157       ONE_MORE_BYTE (c);
4158       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4159       char_offset++;
4160       coding->errors++;
4161     }
4162
4163  no_more_source:
4164   if (last_id != charset_ascii)
4165     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4166   coding->consumed_char += consumed_chars_base;
4167   coding->consumed = src_base - coding->source;
4168   coding->charbuf_used = charbuf - coding->charbuf;
4169 }
4170
4171 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4172    This function can encode charsets `ascii', `katakana-jisx0201',
4173    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4174    are sure that all these charsets are registered as official charset
4175    (i.e. do not have extended leading-codes).  Characters of other
4176    charsets are produced without any encoding.  If SJIS_P is 1, encode
4177    SJIS text, else encode BIG5 text.  */
4178
4179 static int
4180 encode_coding_sjis (coding)
4181      struct coding_system *coding;
4182 {
4183   int multibytep = coding->dst_multibyte;
4184   int *charbuf = coding->charbuf;
4185   int *charbuf_end = charbuf + coding->charbuf_used;
4186   unsigned char *dst = coding->destination + coding->produced;
4187   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4188   int safe_room = 4;
4189   int produced_chars = 0;
4190   Lisp_Object attrs, charset_list, val;
4191   int ascii_compatible;
4192   struct charset *charset_roman, *charset_kanji, *charset_kana;
4193   struct charset *charset_kanji2;
4194   int c;
4195
4196   CODING_GET_INFO (coding, attrs, charset_list);
4197   val = charset_list;
4198   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4199   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4200   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4201   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4202
4203   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4204
4205   while (charbuf < charbuf_end)
4206     {
4207       ASSURE_DESTINATION (safe_room);
4208       c = *charbuf++;
4209       /* Now encode the character C.  */
4210       if (ASCII_CHAR_P (c) && ascii_compatible)
4211         EMIT_ONE_ASCII_BYTE (c);
4212       else if (CHAR_BYTE8_P (c))
4213         {
4214           c = CHAR_TO_BYTE8 (c);
4215           EMIT_ONE_BYTE (c);
4216         }
4217       else
4218         {
4219           unsigned code;
4220           struct charset *charset = char_charset (c, charset_list, &code);
4221
4222           if (!charset)
4223             {
4224               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4225                 {
4226                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4227                   charset = CHARSET_FROM_ID (charset_ascii);
4228                 }
4229               else
4230                 {
4231                   c = coding->default_char;
4232                   charset = char_charset (c, charset_list, &code);
4233                 }
4234             }
4235           if (code == CHARSET_INVALID_CODE (charset))
4236             abort ();
4237           if (charset == charset_kanji)
4238             {
4239               int c1, c2;
4240               JIS_TO_SJIS (code);
4241               c1 = code >> 8, c2 = code & 0xFF;
4242               EMIT_TWO_BYTES (c1, c2);
4243             }
4244           else if (charset == charset_kana)
4245             EMIT_ONE_BYTE (code | 0x80);
4246           else if (charset_kanji2 && charset == charset_kanji2)
4247             {
4248               int c1, c2;
4249
4250               c1 = code >> 8;
4251               if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4252                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4253                 {
4254                   JIS_TO_SJIS2 (code);
4255                   c1 = code >> 8, c2 = code & 0xFF;
4256                   EMIT_TWO_BYTES (c1, c2);
4257                 }
4258               else
4259                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4260             }
4261           else
4262             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4263         }
4264     }
4265   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4266   coding->produced_char += produced_chars;
4267   coding->produced = dst - coding->destination;
4268   return 0;
4269 }
4270
4271 static int
4272 encode_coding_big5 (coding)
4273      struct coding_system *coding;
4274 {
4275   int multibytep = coding->dst_multibyte;
4276   int *charbuf = coding->charbuf;
4277   int *charbuf_end = charbuf + coding->charbuf_used;
4278   unsigned char *dst = coding->destination + coding->produced;
4279   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4280   int safe_room = 4;
4281   int produced_chars = 0;
4282   Lisp_Object attrs, charset_list, val;
4283   int ascii_compatible;
4284   struct charset *charset_roman, *charset_big5;
4285   int c;
4286
4287   CODING_GET_INFO (coding, attrs, charset_list);
4288   val = charset_list;
4289   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4290   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4291   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4292
4293   while (charbuf < charbuf_end)
4294     {
4295       ASSURE_DESTINATION (safe_room);
4296       c = *charbuf++;
4297       /* Now encode the character C.  */
4298       if (ASCII_CHAR_P (c) && ascii_compatible)
4299         EMIT_ONE_ASCII_BYTE (c);
4300       else if (CHAR_BYTE8_P (c))
4301         {
4302           c = CHAR_TO_BYTE8 (c);
4303           EMIT_ONE_BYTE (c);
4304         }
4305       else
4306         {
4307           unsigned code;
4308           struct charset *charset = char_charset (c, charset_list, &code);
4309
4310           if (! charset)
4311             {
4312               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4313                 {
4314                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4315                   charset = CHARSET_FROM_ID (charset_ascii);
4316                 }
4317               else
4318                 {
4319                   c = coding->default_char;
4320                   charset = char_charset (c, charset_list, &code);
4321                 }
4322             }
4323           if (code == CHARSET_INVALID_CODE (charset))
4324             abort ();
4325           if (charset == charset_big5)
4326             {
4327               int c1, c2;
4328
4329               c1 = code >> 8, c2 = code & 0xFF;
4330               EMIT_TWO_BYTES (c1, c2);
4331             }
4332           else
4333             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4334         }
4335     }
4336   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4337   coding->produced_char += produced_chars;
4338   coding->produced = dst - coding->destination;
4339   return 0;
4340 }
4341
4342 \f
4343 /*** 10. CCL handlers ***/
4344
4345 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4346    Check if a text is encoded in a coding system of which
4347    encoder/decoder are written in CCL program.  If it is, return
4348    CATEGORY_MASK_CCL, else return 0.  */
4349
4350 static int
4351 detect_coding_ccl (coding, detect_info)
4352      struct coding_system *coding;
4353      struct coding_detection_info *detect_info;
4354 {
4355   const unsigned char *src = coding->source, *src_base;
4356   const unsigned char *src_end = coding->source + coding->src_bytes;
4357   int multibytep = coding->src_multibyte;
4358   int consumed_chars = 0;
4359   int found = 0;
4360   unsigned char *valids = CODING_CCL_VALIDS (coding);
4361   int head_ascii = coding->head_ascii;
4362   Lisp_Object attrs;
4363
4364   detect_info->checked |= CATEGORY_MASK_CCL;
4365
4366   coding = &coding_categories[coding_category_ccl];
4367   attrs = CODING_ID_ATTRS (coding->id);
4368   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4369     src += head_ascii;
4370
4371   while (1)
4372     {
4373       int c;
4374
4375       src_base = src;
4376       ONE_MORE_BYTE (c);
4377       if (c < 0 || ! valids[c])
4378         break;
4379       if ((valids[c] > 1))
4380         found = CATEGORY_MASK_CCL;
4381     }
4382   detect_info->rejected |= CATEGORY_MASK_CCL;
4383   return 0;
4384
4385  no_more_source:
4386   detect_info->found |= found;
4387   return 1;
4388 }
4389
4390 static void
4391 decode_coding_ccl (coding)
4392      struct coding_system *coding;
4393 {
4394   const unsigned char *src = coding->source + coding->consumed;
4395   const unsigned char *src_end = coding->source + coding->src_bytes;
4396   int *charbuf = coding->charbuf;
4397   int *charbuf_end = charbuf + coding->charbuf_size;
4398   int consumed_chars = 0;
4399   int multibytep = coding->src_multibyte;
4400   struct ccl_program ccl;
4401   int source_charbuf[1024];
4402   int source_byteidx[1024];
4403   Lisp_Object attrs, charset_list;
4404
4405   CODING_GET_INFO (coding, attrs, charset_list);
4406   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4407
4408   while (src < src_end)
4409     {
4410       const unsigned char *p = src;
4411       int *source, *source_end;
4412       int i = 0;
4413
4414       if (multibytep)
4415         while (i < 1024 && p < src_end)
4416           {
4417             source_byteidx[i] = p - src;
4418             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4419           }
4420       else
4421         while (i < 1024 && p < src_end)
4422           source_charbuf[i++] = *p++;
4423
4424       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4425         ccl.last_block = 1;
4426
4427       source = source_charbuf;
4428       source_end = source + i;
4429       while (source < source_end)
4430         {
4431           ccl_driver (&ccl, source, charbuf,
4432                       source_end - source, charbuf_end - charbuf,
4433                       charset_list);
4434           source += ccl.consumed;
4435           charbuf += ccl.produced;
4436           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4437             break;
4438         }
4439       if (source < source_end)
4440         src += source_byteidx[source - source_charbuf];
4441       else
4442         src = p;
4443       consumed_chars += source - source_charbuf;
4444
4445       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4446           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4447         break;
4448     }
4449
4450   switch (ccl.status)
4451     {
4452     case CCL_STAT_SUSPEND_BY_SRC:
4453       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4454       break;
4455     case CCL_STAT_SUSPEND_BY_DST:
4456       break;
4457     case CCL_STAT_QUIT:
4458     case CCL_STAT_INVALID_CMD:
4459       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4460       break;
4461     default:
4462       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4463       break;
4464     }
4465   coding->consumed_char += consumed_chars;
4466   coding->consumed = src - coding->source;
4467   coding->charbuf_used = charbuf - coding->charbuf;
4468 }
4469
4470 static int
4471 encode_coding_ccl (coding)
4472      struct coding_system *coding;
4473 {
4474   struct ccl_program ccl;
4475   int multibytep = coding->dst_multibyte;
4476   int *charbuf = coding->charbuf;
4477   int *charbuf_end = charbuf + coding->charbuf_used;
4478   unsigned char *dst = coding->destination + coding->produced;
4479   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4480   unsigned char *adjusted_dst_end = dst_end - 1;
4481   int destination_charbuf[1024];
4482   int i, produced_chars = 0;
4483   Lisp_Object attrs, charset_list;
4484
4485   CODING_GET_INFO (coding, attrs, charset_list);
4486   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4487
4488   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4489   ccl.dst_multibyte = coding->dst_multibyte;
4490
4491   while (charbuf < charbuf_end && dst < adjusted_dst_end)
4492     {
4493       int dst_bytes = dst_end - dst;
4494       if (dst_bytes > 1024)
4495         dst_bytes = 1024;
4496
4497       ccl_driver (&ccl, charbuf, destination_charbuf,
4498                   charbuf_end - charbuf, dst_bytes, charset_list);
4499       charbuf += ccl.consumed;
4500       if (multibytep)
4501         for (i = 0; i < ccl.produced; i++)
4502           EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4503       else
4504         {
4505           for (i = 0; i < ccl.produced; i++)
4506             *dst++ = destination_charbuf[i] & 0xFF;
4507           produced_chars += ccl.produced;
4508         }
4509     }
4510
4511   switch (ccl.status)
4512     {
4513     case CCL_STAT_SUSPEND_BY_SRC:
4514       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4515       break;
4516     case CCL_STAT_SUSPEND_BY_DST:
4517       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4518       break;
4519     case CCL_STAT_QUIT:
4520     case CCL_STAT_INVALID_CMD:
4521       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4522       break;
4523     default:
4524       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4525       break;
4526     }
4527
4528   coding->produced_char += produced_chars;
4529   coding->produced = dst - coding->destination;
4530   return 0;
4531 }
4532
4533
4534 \f
4535 /*** 10, 11. no-conversion handlers ***/
4536
4537 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4538
4539 static void
4540 decode_coding_raw_text (coding)
4541      struct coding_system *coding;
4542 {
4543   coding->chars_at_source = 1;
4544   coding->consumed_char = 0;
4545   coding->consumed = 0;
4546   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4547 }
4548
4549 static int
4550 encode_coding_raw_text (coding)
4551      struct coding_system *coding;
4552 {
4553   int multibytep = coding->dst_multibyte;
4554   int *charbuf = coding->charbuf;
4555   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4556   unsigned char *dst = coding->destination + coding->produced;
4557   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4558   int produced_chars = 0;
4559   int c;
4560
4561   if (multibytep)
4562     {
4563       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4564
4565       if (coding->src_multibyte)
4566         while (charbuf < charbuf_end)
4567           {
4568             ASSURE_DESTINATION (safe_room);
4569             c = *charbuf++;
4570             if (ASCII_CHAR_P (c))
4571               EMIT_ONE_ASCII_BYTE (c);
4572             else if (CHAR_BYTE8_P (c))
4573               {
4574                 c = CHAR_TO_BYTE8 (c);
4575                 EMIT_ONE_BYTE (c);
4576               }
4577             else
4578               {
4579                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4580
4581                 CHAR_STRING_ADVANCE (c, p1);
4582                 while (p0 < p1)
4583                   {
4584                     EMIT_ONE_BYTE (*p0);
4585                     p0++;
4586                   }
4587               }
4588           }
4589       else
4590         while (charbuf < charbuf_end)
4591           {
4592             ASSURE_DESTINATION (safe_room);
4593             c = *charbuf++;
4594             EMIT_ONE_BYTE (c);
4595           }
4596     }
4597   else
4598     {
4599       if (coding->src_multibyte)
4600         {
4601           int safe_room = MAX_MULTIBYTE_LENGTH;
4602
4603           while (charbuf < charbuf_end)
4604             {
4605               ASSURE_DESTINATION (safe_room);
4606               c = *charbuf++;
4607               if (ASCII_CHAR_P (c))
4608                 *dst++ = c;
4609               else if (CHAR_BYTE8_P (c))
4610                 *dst++ = CHAR_TO_BYTE8 (c);
4611               else
4612                 CHAR_STRING_ADVANCE (c, dst);
4613               produced_chars++;
4614             }
4615         }
4616       else
4617         {
4618           ASSURE_DESTINATION (charbuf_end - charbuf);
4619           while (charbuf < charbuf_end && dst < dst_end)
4620             *dst++ = *charbuf++;
4621           produced_chars = dst - (coding->destination + coding->dst_bytes);
4622         }
4623     }
4624   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4625   coding->produced_char += produced_chars;
4626   coding->produced = dst - coding->destination;
4627   return 0;
4628 }
4629
4630 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4631    Check if a text is encoded in a charset-based coding system.  If it
4632    is, return 1, else return 0.  */
4633
4634 static int
4635 detect_coding_charset (coding, detect_info)
4636      struct coding_system *coding;
4637      struct coding_detection_info *detect_info;
4638 {
4639   const unsigned char *src = coding->source, *src_base;
4640   const unsigned char *src_end = coding->source + coding->src_bytes;
4641   int multibytep = coding->src_multibyte;
4642   int consumed_chars = 0;
4643   Lisp_Object attrs, valids;
4644   int found = 0;
4645
4646   detect_info->checked |= CATEGORY_MASK_CHARSET;
4647
4648   coding = &coding_categories[coding_category_charset];
4649   attrs = CODING_ID_ATTRS (coding->id);
4650   valids = AREF (attrs, coding_attr_charset_valids);
4651
4652   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4653     src += coding->head_ascii;
4654
4655   while (1)
4656     {
4657       int c;
4658
4659       src_base = src;
4660       ONE_MORE_BYTE (c);
4661       if (c < 0)
4662         continue;
4663       if (NILP (AREF (valids, c)))
4664         break;
4665       if (c >= 0x80)
4666         found = CATEGORY_MASK_CHARSET;
4667     }
4668   detect_info->rejected |= CATEGORY_MASK_CHARSET;
4669   return 0;
4670
4671  no_more_source:
4672   detect_info->found |= found;
4673   return 1;
4674 }
4675
4676 static void
4677 decode_coding_charset (coding)
4678      struct coding_system *coding;
4679 {
4680   const unsigned char *src = coding->source + coding->consumed;
4681   const unsigned char *src_end = coding->source + coding->src_bytes;
4682   const unsigned char *src_base;
4683   int *charbuf = coding->charbuf;
4684   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4685   int consumed_chars = 0, consumed_chars_base;
4686   int multibytep = coding->src_multibyte;
4687   Lisp_Object attrs, charset_list, valids;
4688   int char_offset = coding->produced_char;
4689   int last_offset = char_offset;
4690   int last_id = charset_ascii;
4691
4692   CODING_GET_INFO (coding, attrs, charset_list);
4693   valids = AREF (attrs, coding_attr_charset_valids);
4694
4695   while (1)
4696     {
4697       int c;
4698       Lisp_Object val;
4699       struct charset *charset;
4700       int dim;
4701       int len = 1;
4702       unsigned code;
4703
4704       src_base = src;
4705       consumed_chars_base = consumed_chars;
4706
4707       if (charbuf >= charbuf_end)
4708         break;
4709
4710       ONE_MORE_BYTE (c);
4711       if (c < 0)
4712         goto invalid_code;
4713       code = c;
4714
4715       val = AREF (valids, c);
4716       if (NILP (val))
4717         goto invalid_code;
4718       if (INTEGERP (val))
4719         {
4720           charset = CHARSET_FROM_ID (XFASTINT (val));
4721           dim = CHARSET_DIMENSION (charset);
4722           while (len < dim)
4723             {
4724               ONE_MORE_BYTE (c);
4725               code = (code << 8) | c;
4726               len++;
4727             }
4728           CODING_DECODE_CHAR (coding, src, src_base, src_end,
4729                               charset, code, c);
4730         }
4731       else
4732         {
4733           /* VAL is a list of charset IDs.  It is assured that the
4734              list is sorted by charset dimensions (smaller one
4735              comes first).  */
4736           while (CONSP (val))
4737             {
4738               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4739               dim = CHARSET_DIMENSION (charset);
4740               while (len < dim)
4741                 {
4742                   ONE_MORE_BYTE (c);
4743                   code = (code << 8) | c;
4744                   len++;
4745                 }
4746               CODING_DECODE_CHAR (coding, src, src_base,
4747                                   src_end, charset, code, c);
4748               if (c >= 0)
4749                 break;
4750               val = XCDR (val);
4751             }
4752         }
4753       if (c < 0)
4754         goto invalid_code;
4755       if (charset->id != charset_ascii
4756           && last_id != charset->id)
4757         {
4758           if (last_id != charset_ascii)
4759             ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4760           last_id = charset->id;
4761           last_offset = char_offset;
4762         }
4763
4764       *charbuf++ = c;
4765       char_offset++;
4766       continue;
4767
4768     invalid_code:
4769       src = src_base;
4770       consumed_chars = consumed_chars_base;
4771       ONE_MORE_BYTE (c);
4772       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4773       char_offset++;
4774       coding->errors++;
4775     }
4776
4777  no_more_source:
4778   if (last_id != charset_ascii)
4779     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4780   coding->consumed_char += consumed_chars_base;
4781   coding->consumed = src_base - coding->source;
4782   coding->charbuf_used = charbuf - coding->charbuf;
4783 }
4784
4785 static int
4786 encode_coding_charset (coding)
4787      struct coding_system *coding;
4788 {
4789   int multibytep = coding->dst_multibyte;
4790   int *charbuf = coding->charbuf;
4791   int *charbuf_end = charbuf + coding->charbuf_used;
4792   unsigned char *dst = coding->destination + coding->produced;
4793   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4794   int safe_room = MAX_MULTIBYTE_LENGTH;
4795   int produced_chars = 0;
4796   Lisp_Object attrs, charset_list;
4797   int ascii_compatible;
4798   int c;
4799
4800   CODING_GET_INFO (coding, attrs, charset_list);
4801   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4802
4803   while (charbuf < charbuf_end)
4804     {
4805       struct charset *charset;
4806       unsigned code;
4807
4808       ASSURE_DESTINATION (safe_room);
4809       c = *charbuf++;
4810       if (ascii_compatible && ASCII_CHAR_P (c))
4811         EMIT_ONE_ASCII_BYTE (c);
4812       else if (CHAR_BYTE8_P (c))
4813         {
4814           c = CHAR_TO_BYTE8 (c);
4815           EMIT_ONE_BYTE (c);
4816         }
4817       else
4818         {
4819           charset = char_charset (c, charset_list, &code);
4820           if (charset)
4821             {
4822               if (CHARSET_DIMENSION (charset) == 1)
4823                 EMIT_ONE_BYTE (code);
4824               else if (CHARSET_DIMENSION (charset) == 2)
4825                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4826               else if (CHARSET_DIMENSION (charset) == 3)
4827                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4828               else
4829                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4830                                  (code >> 8) & 0xFF, code & 0xFF);
4831             }
4832           else
4833             {
4834               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4835                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4836               else
4837                 c = coding->default_char;
4838               EMIT_ONE_BYTE (c);
4839             }
4840         }
4841     }
4842
4843   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4844   coding->produced_char += produced_chars;
4845   coding->produced = dst - coding->destination;
4846   return 0;
4847 }
4848
4849 \f
4850 /*** 7. C library functions ***/
4851
4852 /* Setup coding context CODING from information about CODING_SYSTEM.
4853    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
4854    CODING_SYSTEM is invalid, signal an error.  */
4855
4856 void
4857 setup_coding_system (coding_system, coding)
4858      Lisp_Object coding_system;
4859      struct coding_system *coding;
4860 {
4861   Lisp_Object attrs;
4862   Lisp_Object eol_type;
4863   Lisp_Object coding_type;
4864   Lisp_Object val;
4865
4866   if (NILP (coding_system))
4867     coding_system = Qno_conversion;
4868
4869   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4870
4871   attrs = CODING_ID_ATTRS (coding->id);
4872   eol_type = CODING_ID_EOL_TYPE (coding->id);
4873
4874   coding->mode = 0;
4875   coding->head_ascii = -1;
4876   coding->common_flags
4877     = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4878   if (! NILP (CODING_ATTR_POST_READ (attrs)))
4879     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
4880   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
4881     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
4882   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
4883     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4884
4885   val = CODING_ATTR_SAFE_CHARSETS (attrs);
4886   coding->max_charset_id = SCHARS (val) - 1;
4887   coding->safe_charsets = (char *) SDATA (val);
4888   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4889
4890   coding_type = CODING_ATTR_TYPE (attrs);
4891   if (EQ (coding_type, Qundecided))
4892     {
4893       coding->detector = NULL;
4894       coding->decoder = decode_coding_raw_text;
4895       coding->encoder = encode_coding_raw_text;
4896       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4897     }
4898   else if (EQ (coding_type, Qiso_2022))
4899     {
4900       int i;
4901       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4902
4903       /* Invoke graphic register 0 to plane 0.  */
4904       CODING_ISO_INVOCATION (coding, 0) = 0;
4905       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
4906       CODING_ISO_INVOCATION (coding, 1)
4907         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4908       /* Setup the initial status of designation.  */
4909       for (i = 0; i < 4; i++)
4910         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4911       /* Not single shifting initially.  */
4912       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4913       /* Beginning of buffer should also be regarded as bol. */
4914       CODING_ISO_BOL (coding) = 1;
4915       coding->detector = detect_coding_iso_2022;
4916       coding->decoder = decode_coding_iso_2022;
4917       coding->encoder = encode_coding_iso_2022;
4918       if (flags & CODING_ISO_FLAG_SAFE)
4919         coding->mode |= CODING_MODE_SAFE_ENCODING;
4920       coding->common_flags
4921         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4922             | CODING_REQUIRE_FLUSHING_MASK);
4923       if (flags & CODING_ISO_FLAG_COMPOSITION)
4924         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4925       if (flags & CODING_ISO_FLAG_DESIGNATION)
4926         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
4927       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4928         {
4929           setup_iso_safe_charsets (attrs);
4930           val = CODING_ATTR_SAFE_CHARSETS (attrs);
4931           coding->max_charset_id = SCHARS (val) - 1;
4932           coding->safe_charsets = (char *) SDATA (val);
4933         }
4934       CODING_ISO_FLAGS (coding) = flags;
4935     }
4936   else if (EQ (coding_type, Qcharset))
4937     {
4938       coding->detector = detect_coding_charset;
4939       coding->decoder = decode_coding_charset;
4940       coding->encoder = encode_coding_charset;
4941       coding->common_flags
4942         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4943     }
4944   else if (EQ (coding_type, Qutf_8))
4945     {
4946       coding->detector = detect_coding_utf_8;
4947       coding->decoder = decode_coding_utf_8;
4948       coding->encoder = encode_coding_utf_8;
4949       coding->common_flags
4950         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4951     }
4952   else if (EQ (coding_type, Qutf_16))
4953     {
4954       val = AREF (attrs, coding_attr_utf_16_bom);
4955       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4956                                     : EQ (val, Qt) ? utf_16_with_bom
4957                                     : utf_16_without_bom);
4958       val = AREF (attrs, coding_attr_utf_16_endian);
4959       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
4960                                        : utf_16_little_endian);
4961       CODING_UTF_16_SURROGATE (coding) = 0;
4962       coding->detector = detect_coding_utf_16;
4963       coding->decoder = decode_coding_utf_16;
4964       coding->encoder = encode_coding_utf_16;
4965       coding->common_flags
4966         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4967       if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
4968         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4969     }
4970   else if (EQ (coding_type, Qccl))
4971     {
4972       coding->detector = detect_coding_ccl;
4973       coding->decoder = decode_coding_ccl;
4974       coding->encoder = encode_coding_ccl;
4975       coding->common_flags
4976         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4977             | CODING_REQUIRE_FLUSHING_MASK);
4978     }
4979   else if (EQ (coding_type, Qemacs_mule))
4980     {
4981       coding->detector = detect_coding_emacs_mule;
4982       coding->decoder = decode_coding_emacs_mule;
4983       coding->encoder = encode_coding_emacs_mule;
4984       coding->common_flags
4985         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4986       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4987           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4988         {
4989           Lisp_Object tail, safe_charsets;
4990           int max_charset_id = 0;
4991
4992           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4993                tail = XCDR (tail))
4994             if (max_charset_id < XFASTINT (XCAR (tail)))
4995               max_charset_id = XFASTINT (XCAR (tail));
4996           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4997                                         make_number (255));
4998           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4999                tail = XCDR (tail))
5000             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5001           coding->max_charset_id = max_charset_id;
5002           coding->safe_charsets = (char *) SDATA (safe_charsets);
5003         }
5004     }
5005   else if (EQ (coding_type, Qshift_jis))
5006     {
5007       coding->detector = detect_coding_sjis;
5008       coding->decoder = decode_coding_sjis;
5009       coding->encoder = encode_coding_sjis;
5010       coding->common_flags
5011         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5012     }
5013   else if (EQ (coding_type, Qbig5))
5014     {
5015       coding->detector = detect_coding_big5;
5016       coding->decoder = decode_coding_big5;
5017       coding->encoder = encode_coding_big5;
5018       coding->common_flags
5019         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5020     }
5021   else                          /* EQ (coding_type, Qraw_text) */
5022     {
5023       coding->detector = NULL;
5024       coding->decoder = decode_coding_raw_text;
5025       coding->encoder = encode_coding_raw_text;
5026     }
5027
5028   return;
5029 }
5030
5031 /* Return raw-text or one of its subsidiaries that has the same
5032    eol_type as CODING-SYSTEM.  */
5033
5034 Lisp_Object
5035 raw_text_coding_system (coding_system)
5036      Lisp_Object coding_system;
5037 {
5038   Lisp_Object spec, attrs;
5039   Lisp_Object eol_type, raw_text_eol_type;
5040
5041   if (NILP (coding_system))
5042     return Qraw_text;
5043   spec = CODING_SYSTEM_SPEC (coding_system);
5044   attrs = AREF (spec, 0);
5045
5046   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5047     return coding_system;
5048
5049   eol_type = AREF (spec, 2);
5050   if (VECTORP (eol_type))
5051     return Qraw_text;
5052   spec = CODING_SYSTEM_SPEC (Qraw_text);
5053   raw_text_eol_type = AREF (spec, 2);
5054   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5055           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5056           : AREF (raw_text_eol_type, 2));
5057 }
5058
5059
5060 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5061    does, return one of the subsidiary that has the same eol-spec as
5062    PARENT.  Otherwise, return CODING_SYSTEM.  */
5063
5064 Lisp_Object
5065 coding_inherit_eol_type (coding_system, parent)
5066      Lisp_Object coding_system, parent;
5067 {
5068   Lisp_Object spec, eol_type;
5069
5070   if (NILP (coding_system))
5071     coding_system = Qraw_text;
5072   spec = CODING_SYSTEM_SPEC (coding_system);
5073   eol_type = AREF (spec, 2);
5074   if (VECTORP (eol_type)
5075       && ! NILP (parent))
5076     {
5077       Lisp_Object parent_spec;
5078       Lisp_Object parent_eol_type;
5079
5080       parent_spec
5081         = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
5082       parent_eol_type = AREF (parent_spec, 2);
5083       if (EQ (parent_eol_type, Qunix))
5084         coding_system = AREF (eol_type, 0);
5085       else if (EQ (parent_eol_type, Qdos))
5086         coding_system = AREF (eol_type, 1);
5087       else if (EQ (parent_eol_type, Qmac))
5088         coding_system = AREF (eol_type, 2);
5089     }
5090   return coding_system;
5091 }
5092
5093 /* Emacs has a mechanism to automatically detect a coding system if it
5094    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5095    it's impossible to distinguish some coding systems accurately
5096    because they use the same range of codes.  So, at first, coding
5097    systems are categorized into 7, those are:
5098
5099    o coding-category-emacs-mule
5100
5101         The category for a coding system which has the same code range
5102         as Emacs' internal format.  Assigned the coding-system (Lisp
5103         symbol) `emacs-mule' by default.
5104
5105    o coding-category-sjis
5106
5107         The category for a coding system which has the same code range
5108         as SJIS.  Assigned the coding-system (Lisp
5109         symbol) `japanese-shift-jis' by default.
5110
5111    o coding-category-iso-7
5112
5113         The category for a coding system which has the same code range
5114         as ISO2022 of 7-bit environment.  This doesn't use any locking
5115         shift and single shift functions.  This can encode/decode all
5116         charsets.  Assigned the coding-system (Lisp symbol)
5117         `iso-2022-7bit' by default.
5118
5119    o coding-category-iso-7-tight
5120
5121         Same as coding-category-iso-7 except that this can
5122         encode/decode only the specified charsets.
5123
5124    o coding-category-iso-8-1
5125
5126         The category for a coding system which has the same code range
5127         as ISO2022 of 8-bit environment and graphic plane 1 used only
5128         for DIMENSION1 charset.  This doesn't use any locking shift
5129         and single shift functions.  Assigned the coding-system (Lisp
5130         symbol) `iso-latin-1' by default.
5131
5132    o coding-category-iso-8-2
5133
5134         The category for a coding system which has the same code range
5135         as ISO2022 of 8-bit environment and graphic plane 1 used only
5136         for DIMENSION2 charset.  This doesn't use any locking shift
5137         and single shift functions.  Assigned the coding-system (Lisp
5138         symbol) `japanese-iso-8bit' by default.
5139
5140    o coding-category-iso-7-else
5141
5142         The category for a coding system which has the same code range
5143         as ISO2022 of 7-bit environemnt but uses locking shift or
5144         single shift functions.  Assigned the coding-system (Lisp
5145         symbol) `iso-2022-7bit-lock' by default.
5146
5147    o coding-category-iso-8-else
5148
5149         The category for a coding system which has the same code range
5150         as ISO2022 of 8-bit environemnt but uses locking shift or
5151         single shift functions.  Assigned the coding-system (Lisp
5152         symbol) `iso-2022-8bit-ss2' by default.
5153
5154    o coding-category-big5
5155
5156         The category for a coding system which has the same code range
5157         as BIG5.  Assigned the coding-system (Lisp symbol)
5158         `cn-big5' by default.
5159
5160    o coding-category-utf-8
5161
5162         The category for a coding system which has the same code range
5163         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
5164         symbol) `utf-8' by default.
5165
5166    o coding-category-utf-16-be
5167
5168         The category for a coding system in which a text has an
5169         Unicode signature (cf. Unicode Standard) in the order of BIG
5170         endian at the head.  Assigned the coding-system (Lisp symbol)
5171         `utf-16-be' by default.
5172
5173    o coding-category-utf-16-le
5174
5175         The category for a coding system in which a text has an
5176         Unicode signature (cf. Unicode Standard) in the order of
5177         LITTLE endian at the head.  Assigned the coding-system (Lisp
5178         symbol) `utf-16-le' by default.
5179
5180    o coding-category-ccl
5181
5182         The category for a coding system of which encoder/decoder is
5183         written in CCL programs.  The default value is nil, i.e., no
5184         coding system is assigned.
5185
5186    o coding-category-binary
5187
5188         The category for a coding system not categorized in any of the
5189         above.  Assigned the coding-system (Lisp symbol)
5190         `no-conversion' by default.
5191
5192    Each of them is a Lisp symbol and the value is an actual
5193    `coding-system's (this is also a Lisp symbol) assigned by a user.
5194    What Emacs does actually is to detect a category of coding system.
5195    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5196    decide only one possible category, it selects a category of the
5197    highest priority.  Priorities of categories are also specified by a
5198    user in a Lisp variable `coding-category-list'.
5199
5200 */
5201
5202 #define EOL_SEEN_NONE   0
5203 #define EOL_SEEN_LF     1
5204 #define EOL_SEEN_CR     2
5205 #define EOL_SEEN_CRLF   4
5206
5207 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5208    SOURCE is encoded.  If CATEGORY is one of
5209    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5210    two-byte, else they are encoded by one-byte.
5211
5212    Return one of EOL_SEEN_XXX.  */
5213
5214 #define MAX_EOL_CHECK_COUNT 3
5215
5216 static int
5217 detect_eol (source, src_bytes, category)
5218      unsigned char *source;
5219      EMACS_INT src_bytes;
5220      enum coding_category category;
5221 {
5222   unsigned char *src = source, *src_end = src + src_bytes;
5223   unsigned char c;
5224   int total  = 0;
5225   int eol_seen = EOL_SEEN_NONE;
5226
5227   if ((1 << category) & CATEGORY_MASK_UTF_16)
5228     {
5229       int msb, lsb;
5230
5231       msb = category == (coding_category_utf_16_le
5232                          | coding_category_utf_16_le_nosig);
5233       lsb = 1 - msb;
5234
5235       while (src + 1 < src_end)
5236         {
5237           c = src[lsb];
5238           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5239             {
5240               int this_eol;
5241
5242               if (c == '\n')
5243                 this_eol = EOL_SEEN_LF;
5244               else if (src + 3 >= src_end
5245                        || src[msb + 2] != 0
5246                        || src[lsb + 2] != '\n')
5247                 this_eol = EOL_SEEN_CR;
5248               else
5249                 this_eol = EOL_SEEN_CRLF;
5250
5251               if (eol_seen == EOL_SEEN_NONE)
5252                 /* This is the first end-of-line.  */
5253                 eol_seen = this_eol;
5254               else if (eol_seen != this_eol)
5255                 {
5256                   /* The found type is different from what found before.  */
5257                   eol_seen = EOL_SEEN_LF;
5258                   break;
5259                 }
5260               if (++total == MAX_EOL_CHECK_COUNT)
5261                 break;
5262             }
5263           src += 2;
5264         }
5265     }
5266   else
5267     {
5268       while (src < src_end)
5269         {
5270           c = *src++;
5271           if (c == '\n' || c == '\r')
5272             {
5273               int this_eol;
5274
5275               if (c == '\n')
5276                 this_eol = EOL_SEEN_LF;
5277               else if (src >= src_end || *src != '\n')
5278                 this_eol = EOL_SEEN_CR;
5279               else
5280                 this_eol = EOL_SEEN_CRLF, src++;
5281
5282               if (eol_seen == EOL_SEEN_NONE)
5283                 /* This is the first end-of-line.  */
5284                 eol_seen = this_eol;
5285               else if (eol_seen != this_eol)
5286                 {
5287                   /* The found type is different from what found before.  */
5288                   eol_seen = EOL_SEEN_LF;
5289                   break;
5290                 }
5291               if (++total == MAX_EOL_CHECK_COUNT)
5292                 break;
5293             }
5294         }
5295     }
5296   return eol_seen;
5297 }
5298
5299
5300 static Lisp_Object
5301 adjust_coding_eol_type (coding, eol_seen)
5302      struct coding_system *coding;
5303      int eol_seen;
5304 {
5305   Lisp_Object eol_type;
5306
5307   eol_type = CODING_ID_EOL_TYPE (coding->id);
5308   if (eol_seen & EOL_SEEN_LF)
5309     {
5310       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5311       eol_type = Qunix;
5312     }
5313   else if (eol_seen & EOL_SEEN_CRLF)
5314     {
5315       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5316       eol_type = Qdos;
5317     }
5318   else if (eol_seen & EOL_SEEN_CR)
5319     {
5320       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5321       eol_type = Qmac;
5322     }
5323   return eol_type;
5324 }
5325
5326 /* Detect how a text specified in CODING is encoded.  If a coding
5327    system is detected, update fields of CODING by the detected coding
5328    system.  */
5329
5330 void
5331 detect_coding (coding)
5332      struct coding_system *coding;
5333 {
5334   const unsigned char *src, *src_end;
5335   Lisp_Object attrs, coding_type;
5336
5337   coding->consumed = coding->consumed_char = 0;
5338   coding->produced = coding->produced_char = 0;
5339   coding_set_source (coding);
5340
5341   src_end = coding->source + coding->src_bytes;
5342
5343   /* If we have not yet decided the text encoding type, detect it
5344      now.  */
5345   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5346     {
5347       int c, i;
5348
5349       for (i = 0, src = coding->source; src < src_end; i++, src++)
5350         {
5351           c = *src;
5352           if (c & 0x80 || (c < 0x20 && (c == 0
5353                                         || c == ISO_CODE_ESC
5354                                         || c == ISO_CODE_SI
5355                                         || c == ISO_CODE_SO)))
5356             break;
5357         }
5358       coding->head_ascii = src - (coding->source + coding->consumed);
5359
5360       if (coding->head_ascii < coding->src_bytes)
5361         {
5362           struct coding_detection_info detect_info;
5363           enum coding_category category;
5364           struct coding_system *this;
5365
5366           detect_info.checked = detect_info.found = detect_info.rejected = 0;
5367           for (i = 0; i < coding_category_raw_text; i++)
5368             {
5369               category = coding_priorities[i];
5370               this = coding_categories + category;
5371               if (this->id < 0)
5372                 {
5373                   /* No coding system of this category is defined.  */
5374                   detect_info.rejected |= (1 << category);
5375                 }
5376               else if (category >= coding_category_raw_text)
5377                 continue;
5378               else if (detect_info.checked & (1 << category))
5379                 {
5380                   if (detect_info.found & (1 << category))
5381                     break;
5382                 }
5383               else if ((*(this->detector)) (coding, &detect_info)
5384                        && detect_info.found & (1 << category))
5385                 {
5386                   if (category == coding_category_utf_16_auto)
5387                     {
5388                       if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5389                         category = coding_category_utf_16_le;
5390                       else
5391                         category = coding_category_utf_16_be;
5392                     }
5393                   break;
5394                 }
5395             }
5396           if (i < coding_category_raw_text)
5397             setup_coding_system (CODING_ID_NAME (this->id), coding);
5398           else if (detect_info.rejected == CATEGORY_MASK_ANY)
5399             setup_coding_system (Qraw_text, coding);
5400           else if (detect_info.rejected)
5401             for (i = 0; i < coding_category_raw_text; i++)
5402               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5403                 {
5404                   this = coding_categories + coding_priorities[i];
5405                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5406                   break;
5407                 }
5408         }
5409     }
5410   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5411            == coding_category_utf_16_auto)
5412     {
5413       Lisp_Object coding_systems;
5414       struct coding_detection_info detect_info;
5415
5416       coding_systems
5417         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5418       detect_info.found = detect_info.rejected = 0;
5419       if (CONSP (coding_systems)
5420           && detect_coding_utf_16 (coding, &detect_info))
5421         {
5422           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5423             setup_coding_system (XCAR (coding_systems), coding);
5424           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
5425             setup_coding_system (XCDR (coding_systems), coding);
5426         }
5427     }
5428 }
5429
5430
5431 static void
5432 decode_eol (coding)
5433      struct coding_system *coding;
5434 {
5435   Lisp_Object eol_type;
5436   unsigned char *p, *pbeg, *pend;
5437
5438   eol_type = CODING_ID_EOL_TYPE (coding->id);
5439   if (EQ (eol_type, Qunix))
5440     return;
5441
5442   if (NILP (coding->dst_object))
5443     pbeg = coding->destination;
5444   else
5445     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5446   pend = pbeg + coding->produced;
5447
5448   if (VECTORP (eol_type))
5449     {
5450       int eol_seen = EOL_SEEN_NONE;
5451
5452       for (p = pbeg; p < pend; p++)
5453         {
5454           if (*p == '\n')
5455             eol_seen |= EOL_SEEN_LF;
5456           else if (*p == '\r')
5457             {
5458               if (p + 1 < pend && *(p + 1) == '\n')
5459                 {
5460                   eol_seen |= EOL_SEEN_CRLF;
5461                   p++;
5462                 }
5463               else
5464                 eol_seen |= EOL_SEEN_CR;
5465             }
5466         }
5467       if (eol_seen != EOL_SEEN_NONE
5468           && eol_seen != EOL_SEEN_LF
5469           && eol_seen != EOL_SEEN_CRLF
5470           && eol_seen != EOL_SEEN_CR)
5471         eol_seen = EOL_SEEN_LF;
5472       if (eol_seen != EOL_SEEN_NONE)
5473         eol_type = adjust_coding_eol_type (coding, eol_seen);
5474     }
5475
5476   if (EQ (eol_type, Qmac))
5477     {
5478       for (p = pbeg; p < pend; p++)
5479         if (*p == '\r')
5480           *p = '\n';
5481     }
5482   else if (EQ (eol_type, Qdos))
5483     {
5484       int n = 0;
5485
5486       if (NILP (coding->dst_object))
5487         {
5488           for (p = pend - 2; p >= pbeg; p--)
5489             if (*p == '\r')
5490               {
5491                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5492                 n++;
5493               }
5494         }
5495       else
5496         {
5497           for (p = pend - 2; p >= pbeg; p--)
5498             if (*p == '\r')
5499               {
5500                 int pos_byte = coding->dst_pos_byte + (p - pbeg);
5501                 int pos = BYTE_TO_CHAR (pos_byte);
5502
5503                 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5504                 n++;
5505               }
5506         }
5507       coding->produced -= n;
5508       coding->produced_char -= n;
5509     }
5510 }
5511
5512
5513 /* Return a translation table (or list of them) from coding system
5514    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5515    decoding (ENCODEP is zero). */
5516
5517 static INLINE
5518 get_translation_table (attrs, encodep)
5519 {
5520   Lisp_Object standard, translation_table;
5521
5522   if (encodep)
5523     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
5524       standard = Vstandard_translation_table_for_encode;
5525   else
5526     translation_table = CODING_ATTR_DECODE_TBL (attrs),
5527       standard = Vstandard_translation_table_for_decode;
5528   if (NILP (translation_table))
5529     return standard;
5530   if (SYMBOLP (translation_table))
5531     translation_table = Fget (translation_table, Qtranslation_table);
5532   else if (CONSP (translation_table))
5533     {
5534       Lisp_Object val;
5535
5536       translation_table = Fcopy_sequence (translation_table);
5537       for (val = translation_table; CONSP (val); val = XCDR (val))
5538         if (SYMBOLP (XCAR (val)))
5539           XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
5540     }
5541   if (! NILP (standard))
5542     {
5543       if (CONSP (translation_table))
5544         translation_table = nconc2 (translation_table, Fcons (standard, Qnil));
5545       else
5546         translation_table = Fcons (translation_table, Fcons (standard, Qnil));
5547     }
5548   return translation_table;
5549 }
5550
5551
5552 static void
5553 translate_chars (coding, table)
5554      struct coding_system *coding;
5555      Lisp_Object table;
5556 {
5557   int *charbuf = coding->charbuf;
5558   int *charbuf_end = charbuf + coding->charbuf_used;
5559   int c;
5560
5561   if (coding->chars_at_source)
5562     return;
5563
5564   while (charbuf < charbuf_end)
5565     {
5566       c = *charbuf;
5567       if (c < 0)
5568         charbuf += -c;
5569       else
5570         *charbuf++ = translate_char (table, c);
5571     }
5572 }
5573
5574 static int
5575 produce_chars (coding)
5576      struct coding_system *coding;
5577 {
5578   unsigned char *dst = coding->destination + coding->produced;
5579   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5580   int produced;
5581   int produced_chars = 0;
5582
5583   if (! coding->chars_at_source)
5584     {
5585       /* Characters are in coding->charbuf.  */
5586       int *buf = coding->charbuf;
5587       int *buf_end = buf + coding->charbuf_used;
5588       unsigned char *adjusted_dst_end;
5589
5590       if (BUFFERP (coding->src_object)
5591           && EQ (coding->src_object, coding->dst_object))
5592         dst_end = ((unsigned char *) coding->source) + coding->consumed;
5593       adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5594
5595       while (buf < buf_end)
5596         {
5597           int c = *buf++;
5598
5599           if (dst >= adjusted_dst_end)
5600             {
5601               dst = alloc_destination (coding,
5602                                        buf_end - buf + MAX_MULTIBYTE_LENGTH,
5603                                        dst);
5604               dst_end = coding->destination + coding->dst_bytes;
5605               adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5606             }
5607           if (c >= 0)
5608             {
5609               if (coding->dst_multibyte
5610                   || ! CHAR_BYTE8_P (c))
5611                 CHAR_STRING_ADVANCE (c, dst);
5612               else
5613                 *dst++ = CHAR_TO_BYTE8 (c);
5614               produced_chars++;
5615             }
5616           else
5617             /* This is an annotation datum.  (-C) is the length of
5618                it.  */
5619             buf += -c - 1;
5620         }
5621     }
5622   else
5623     {
5624       const unsigned char *src = coding->source;
5625       const unsigned char *src_end = src + coding->src_bytes;
5626       Lisp_Object eol_type;
5627
5628       eol_type = CODING_ID_EOL_TYPE (coding->id);
5629
5630       if (coding->src_multibyte != coding->dst_multibyte)
5631         {
5632           if (coding->src_multibyte)
5633             {
5634               int multibytep = 1;
5635               int consumed_chars;
5636
5637               while (1)
5638                 {
5639                   const unsigned char *src_base = src;
5640                   int c;
5641
5642                   ONE_MORE_BYTE (c);
5643                   if (c == '\r')
5644                     {
5645                       if (EQ (eol_type, Qdos))
5646                         {
5647                           if (src == src_end)
5648                             {
5649                               record_conversion_result
5650                                 (coding, CODING_RESULT_INSUFFICIENT_SRC);
5651                               goto no_more_source;
5652                             }
5653                           if (*src == '\n')
5654                             c = *src++;
5655                         }
5656                       else if (EQ (eol_type, Qmac))
5657                         c = '\n';
5658                     }
5659                   if (dst == dst_end)
5660                     {
5661                       coding->consumed = src - coding->source;
5662
5663                     if (EQ (coding->src_object, coding->dst_object))
5664                       dst_end = (unsigned char *) src;
5665                     if (dst == dst_end)
5666                       {
5667                         dst = alloc_destination (coding, src_end - src + 1,
5668                                                  dst);
5669                         dst_end = coding->destination + coding->dst_bytes;
5670                         coding_set_source (coding);
5671                         src = coding->source + coding->consumed;
5672                         src_end = coding->source + coding->src_bytes;
5673                       }
5674                     }
5675                   *dst++ = c;
5676                   produced_chars++;
5677                 }
5678             no_more_source:
5679               ;
5680             }
5681           else
5682             while (src < src_end)
5683               {
5684                 int multibytep = 1;
5685                 int c = *src++;
5686
5687                 if (c == '\r')
5688                   {
5689                     if (EQ (eol_type, Qdos))
5690                       {
5691                         if (src < src_end
5692                             && *src == '\n')
5693                           c = *src++;
5694                       }
5695                     else if (EQ (eol_type, Qmac))
5696                       c = '\n';
5697                   }
5698                 if (dst >= dst_end - 1)
5699                   {
5700                     coding->consumed = src - coding->source;
5701
5702                     if (EQ (coding->src_object, coding->dst_object))
5703                       dst_end = (unsigned char *) src;
5704                     if (dst >= dst_end - 1)
5705                       {
5706                         dst = alloc_destination (coding, src_end - src + 2,
5707                                                  dst);
5708                         dst_end = coding->destination + coding->dst_bytes;
5709                         coding_set_source (coding);
5710                         src = coding->source + coding->consumed;
5711                         src_end = coding->source + coding->src_bytes;
5712                       }
5713                   }
5714                 EMIT_ONE_BYTE (c);
5715               }
5716         }
5717       else
5718         {
5719           if (!EQ (coding->src_object, coding->dst_object))
5720             {
5721               int require = coding->src_bytes - coding->dst_bytes;
5722
5723               if (require > 0)
5724                 {
5725                   EMACS_INT offset = src - coding->source;
5726
5727                   dst = alloc_destination (coding, require, dst);
5728                   coding_set_source (coding);
5729                   src = coding->source + offset;
5730                   src_end = coding->source + coding->src_bytes;
5731                 }
5732             }
5733           produced_chars = coding->src_chars;
5734           while (src < src_end)
5735             {
5736               int c = *src++;
5737
5738               if (c == '\r')
5739                 {
5740                   if (EQ (eol_type, Qdos))
5741                     {
5742                       if (src < src_end
5743                           && *src == '\n')
5744                         c = *src++;
5745                       produced_chars--;
5746                     }
5747                   else if (EQ (eol_type, Qmac))
5748                     c = '\n';
5749                 }
5750               *dst++ = c;
5751             }
5752         }
5753       coding->consumed = coding->src_bytes;
5754       coding->consumed_char = coding->src_chars;
5755     }
5756
5757   produced = dst - (coding->destination + coding->produced);
5758   if (BUFFERP (coding->dst_object))
5759     insert_from_gap (produced_chars, produced);
5760   coding->produced += produced;
5761   coding->produced_char += produced_chars;
5762   return produced_chars;
5763 }
5764
5765 /* Compose text in CODING->object according to the annotation data at
5766    CHARBUF.  CHARBUF is an array:
5767      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5768  */
5769
5770 static INLINE void
5771 produce_composition (coding, charbuf)
5772      struct coding_system *coding;
5773      int *charbuf;
5774 {
5775   int len;
5776   EMACS_INT from, to;
5777   enum composition_method method;
5778   Lisp_Object components;
5779
5780   len = -charbuf[0];
5781   from = coding->dst_pos + charbuf[2];
5782   to = coding->dst_pos + charbuf[3];
5783   method = (enum composition_method) (charbuf[4]);
5784
5785   if (method == COMPOSITION_RELATIVE)
5786     components = Qnil;
5787   else
5788     {
5789       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5790       int i;
5791
5792       len -= 5;
5793       charbuf += 5;
5794       for (i = 0; i < len; i++)
5795         args[i] = make_number (charbuf[i]);
5796       components = (method == COMPOSITION_WITH_ALTCHARS
5797                     ? Fstring (len, args) : Fvector (len, args));
5798     }
5799   compose_text (from, to, components, Qnil, coding->dst_object);
5800 }
5801
5802
5803 /* Put `charset' property on text in CODING->object according to
5804    the annotation data at CHARBUF.  CHARBUF is an array:
5805      [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5806  */
5807
5808 static INLINE void
5809 produce_charset (coding, charbuf)
5810      struct coding_system *coding;
5811      int *charbuf;
5812 {
5813   EMACS_INT from = coding->dst_pos + charbuf[2];
5814   EMACS_INT to = coding->dst_pos + charbuf[3];
5815   struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
5816
5817   Fput_text_property (make_number (from), make_number (to),
5818                       Qcharset, CHARSET_NAME (charset),
5819                       coding->dst_object);
5820 }
5821
5822
5823 #define CHARBUF_SIZE 0x4000
5824
5825 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
5826   do {                                                                  \
5827     int size = CHARBUF_SIZE;;                                           \
5828                                                                         \
5829     coding->charbuf = NULL;                                             \
5830     while (size > 1024)                                                 \
5831       {                                                                 \
5832         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
5833         if (coding->charbuf)                                            \
5834           break;                                                        \
5835         size >>= 1;                                                     \
5836       }                                                                 \
5837     if (! coding->charbuf)                                              \
5838       {                                                                 \
5839         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
5840         return coding->result;                                          \
5841       }                                                                 \
5842     coding->charbuf_size = size;                                        \
5843   } while (0)
5844
5845
5846 static void
5847 produce_annotation (coding)
5848      struct coding_system *coding;
5849 {
5850   int *charbuf = coding->charbuf;
5851   int *charbuf_end = charbuf + coding->charbuf_used;
5852
5853   if (NILP (coding->dst_object))
5854     return;
5855
5856   while (charbuf < charbuf_end)
5857     {
5858       if (*charbuf >= 0)
5859         charbuf++;
5860       else
5861         {
5862           int len = -*charbuf;
5863           switch (charbuf[1])
5864             {
5865             case CODING_ANNOTATE_COMPOSITION_MASK:
5866               produce_composition (coding, charbuf);
5867               break;
5868             case CODING_ANNOTATE_CHARSET_MASK:
5869               produce_charset (coding, charbuf);
5870               break;
5871             default:
5872               abort ();
5873             }
5874           charbuf += len;
5875         }
5876     }
5877 }
5878
5879 /* Decode the data at CODING->src_object into CODING->dst_object.
5880    CODING->src_object is a buffer, a string, or nil.
5881    CODING->dst_object is a buffer.
5882
5883    If CODING->src_object is a buffer, it must be the current buffer.
5884    In this case, if CODING->src_pos is positive, it is a position of
5885    the source text in the buffer, otherwise, the source text is in the
5886    gap area of the buffer, and CODING->src_pos specifies the offset of
5887    the text from GPT (which must be the same as PT).  If this is the
5888    same buffer as CODING->dst_object, CODING->src_pos must be
5889    negative.
5890
5891    If CODING->src_object is a string, CODING->src_pos in an index to
5892    that string.
5893
5894    If CODING->src_object is nil, CODING->source must already point to
5895    the non-relocatable memory area.  In this case, CODING->src_pos is
5896    an offset from CODING->source.
5897
5898    The decoded data is inserted at the current point of the buffer
5899    CODING->dst_object.
5900 */
5901
5902 static int
5903 decode_coding (coding)
5904      struct coding_system *coding;
5905 {
5906   Lisp_Object attrs;
5907   Lisp_Object undo_list;
5908   Lisp_Object translation_table;
5909
5910   if (BUFFERP (coding->src_object)
5911       && coding->src_pos > 0
5912       && coding->src_pos < GPT
5913       && coding->src_pos + coding->src_chars > GPT)
5914     move_gap_both (coding->src_pos, coding->src_pos_byte);
5915
5916   undo_list = Qt;
5917   if (BUFFERP (coding->dst_object))
5918     {
5919       if (current_buffer != XBUFFER (coding->dst_object))
5920         set_buffer_internal (XBUFFER (coding->dst_object));
5921       if (GPT != PT)
5922         move_gap_both (PT, PT_BYTE);
5923       undo_list = current_buffer->undo_list;
5924       current_buffer->undo_list = Qt;
5925     }
5926
5927   coding->consumed = coding->consumed_char = 0;
5928   coding->produced = coding->produced_char = 0;
5929   coding->chars_at_source = 0;
5930   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5931   coding->errors = 0;
5932
5933   ALLOC_CONVERSION_WORK_AREA (coding);
5934
5935   attrs = CODING_ID_ATTRS (coding->id);
5936   translation_table = get_translation_table (attrs, 0);
5937
5938   do
5939     {
5940       coding_set_source (coding);
5941       coding->annotated = 0;
5942       (*(coding->decoder)) (coding);
5943       if (!NILP (translation_table))
5944         translate_chars (coding, translation_table);
5945       coding_set_destination (coding);
5946       produce_chars (coding);
5947       if (coding->annotated)
5948         produce_annotation (coding);
5949     }
5950   while (coding->consumed < coding->src_bytes
5951          && ! coding->result);
5952
5953   coding->carryover_bytes = 0;
5954   if (coding->consumed < coding->src_bytes)
5955     {
5956       int nbytes = coding->src_bytes - coding->consumed;
5957       const unsigned char *src;
5958
5959       coding_set_source (coding);
5960       coding_set_destination (coding);
5961       src = coding->source + coding->consumed;
5962
5963       if (coding->mode & CODING_MODE_LAST_BLOCK)
5964         {
5965           /* Flush out unprocessed data as binary chars.  We are sure
5966              that the number of data is less than the size of
5967              coding->charbuf.  */
5968           coding->charbuf_used = 0;
5969           while (nbytes-- > 0)
5970             {
5971               int c = *src++;
5972
5973               coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
5974             }
5975           produce_chars (coding);
5976         }
5977       else
5978         {
5979           /* Record unprocessed bytes in coding->carryover.  We are
5980              sure that the number of data is less than the size of
5981              coding->carryover.  */
5982           unsigned char *p = coding->carryover;
5983
5984           coding->carryover_bytes = nbytes;
5985           while (nbytes-- > 0)
5986             *p++ = *src++;
5987         }
5988       coding->consumed = coding->src_bytes;
5989     }
5990
5991   if (BUFFERP (coding->dst_object))
5992     {
5993       current_buffer->undo_list = undo_list;
5994       record_insert (coding->dst_pos, coding->produced_char);
5995     }
5996   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5997     decode_eol (coding);
5998   return coding->result;
5999 }
6000
6001
6002 /* Extract an annotation datum from a composition starting at POS and
6003    ending before LIMIT of CODING->src_object (buffer or string), store
6004    the data in BUF, set *STOP to a starting position of the next
6005    composition (if any) or to LIMIT, and return the address of the
6006    next element of BUF.
6007
6008    If such an annotation is not found, set *STOP to a starting
6009    position of a composition after POS (if any) or to LIMIT, and
6010    return BUF.  */
6011
6012 static INLINE int *
6013 handle_composition_annotation (pos, limit, coding, buf, stop)
6014      EMACS_INT pos, limit;
6015      struct coding_system *coding;
6016      int *buf;
6017      EMACS_INT *stop;
6018 {
6019   EMACS_INT start, end;
6020   Lisp_Object prop;
6021
6022   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6023       || end > limit)
6024     *stop = limit;
6025   else if (start > pos)
6026     *stop = start;
6027   else
6028     {
6029       if (start == pos)
6030         {
6031           /* We found a composition.  Store the corresponding
6032              annotation data in BUF.  */
6033           int *head = buf;
6034           enum composition_method method = COMPOSITION_METHOD (prop);
6035           int nchars = COMPOSITION_LENGTH (prop);
6036
6037           ADD_COMPOSITION_DATA (buf, 0, nchars, method);
6038           if (method != COMPOSITION_RELATIVE)
6039             {
6040               Lisp_Object components;
6041               int len, i, i_byte;
6042
6043               components = COMPOSITION_COMPONENTS (prop);
6044               if (VECTORP (components))
6045                 {
6046                   len = XVECTOR (components)->size;
6047                   for (i = 0; i < len; i++)
6048                     *buf++ = XINT (AREF (components, i));
6049                 }
6050               else if (STRINGP (components))
6051                 {
6052                   len = SCHARS (components);
6053                   i = i_byte = 0;
6054                   while (i < len)
6055                     {
6056                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6057                       buf++;
6058                     }
6059                 }
6060               else if (INTEGERP (components))
6061                 {
6062                   len = 1;
6063                   *buf++ = XINT (components);
6064                 }
6065               else if (CONSP (components))
6066                 {
6067                   for (len = 0; CONSP (components);
6068                        len++, components = XCDR (components))
6069                     *buf++ = XINT (XCAR (components));
6070                 }
6071               else
6072                 abort ();
6073               *head -= len;
6074             }
6075         }
6076
6077       if (find_composition (end, limit, &start, &end, &prop,
6078                             coding->src_object)
6079           && end <= limit)
6080         *stop = start;
6081       else
6082         *stop = limit;
6083     }
6084   return buf;
6085 }
6086
6087
6088 /* Extract an annotation datum from a text property `charset' at POS of
6089    CODING->src_object (buffer of string), store the data in BUF, set
6090    *STOP to the position where the value of `charset' property changes
6091    (limiting by LIMIT), and return the address of the next element of
6092    BUF.
6093
6094    If the property value is nil, set *STOP to the position where the
6095    property value is non-nil (limiting by LIMIT), and return BUF.  */
6096
6097 static INLINE int *
6098 handle_charset_annotation (pos, limit, coding, buf, stop)
6099      EMACS_INT pos, limit;
6100      struct coding_system *coding;
6101      int *buf;
6102      EMACS_INT *stop;
6103 {
6104   Lisp_Object val, next;
6105   int id;
6106
6107   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6108   if (! NILP (val) && CHARSETP (val))
6109     id = XINT (CHARSET_SYMBOL_ID (val));
6110   else
6111     id = -1;
6112   ADD_CHARSET_DATA (buf, 0, 0, id);
6113   next = Fnext_single_property_change (make_number (pos), Qcharset,
6114                                        coding->src_object,
6115                                        make_number (limit));
6116   *stop = XINT (next);
6117   return buf;
6118 }
6119
6120
6121 static void
6122 consume_chars (coding)
6123      struct coding_system *coding;
6124 {
6125   int *buf = coding->charbuf;
6126   int *buf_end = coding->charbuf + coding->charbuf_size;
6127   const unsigned char *src = coding->source + coding->consumed;
6128   const unsigned char *src_end = coding->source + coding->src_bytes;
6129   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6130   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6131   int multibytep = coding->src_multibyte;
6132   Lisp_Object eol_type;
6133   int c;
6134   EMACS_INT stop, stop_composition, stop_charset;
6135
6136   eol_type = CODING_ID_EOL_TYPE (coding->id);
6137   if (VECTORP (eol_type))
6138     eol_type = Qunix;
6139
6140   /* Note: composition handling is not yet implemented.  */
6141   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6142
6143   if (NILP (coding->src_object))
6144     stop = stop_composition = stop_charset = end_pos;
6145   else
6146     {
6147       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6148         stop = stop_composition = pos;
6149       else
6150         stop = stop_composition = end_pos;
6151       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6152         stop = stop_charset = pos;
6153       else
6154         stop_charset = end_pos;
6155     }
6156
6157   /* Compensate for CRLF and conversion.  */
6158   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6159   while (buf < buf_end)
6160     {
6161       if (pos == stop)
6162         {
6163           if (pos == end_pos)
6164             break;
6165           if (pos == stop_composition)
6166             buf = handle_composition_annotation (pos, end_pos, coding,
6167                                                  buf, &stop_composition);
6168           if (pos == stop_charset)
6169             buf = handle_charset_annotation (pos, end_pos, coding,
6170                                              buf, &stop_charset);
6171           stop = (stop_composition < stop_charset
6172                   ? stop_composition : stop_charset);
6173         }
6174
6175       if (! multibytep)
6176         {
6177           EMACS_INT bytes;
6178
6179           if (! CODING_FOR_UNIBYTE (coding)
6180               && (bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6181             c = STRING_CHAR_ADVANCE (src), pos += bytes;
6182           else
6183             c = *src++, pos++;
6184         }
6185       else
6186         c = STRING_CHAR_ADVANCE (src), pos++;
6187       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6188         c = '\n';
6189       if (! EQ (eol_type, Qunix))
6190         {
6191           if (c == '\n')
6192             {
6193               if (EQ (eol_type, Qdos))
6194                 *buf++ = '\r';
6195               else
6196                 c = '\r';
6197             }
6198         }
6199       *buf++ = c;
6200     }
6201
6202   coding->consumed = src - coding->source;
6203   coding->consumed_char = pos - coding->src_pos;
6204   coding->charbuf_used = buf - coding->charbuf;
6205   coding->chars_at_source = 0;
6206 }
6207
6208
6209 /* Encode the text at CODING->src_object into CODING->dst_object.
6210    CODING->src_object is a buffer or a string.
6211    CODING->dst_object is a buffer or nil.
6212
6213    If CODING->src_object is a buffer, it must be the current buffer.
6214    In this case, if CODING->src_pos is positive, it is a position of
6215    the source text in the buffer, otherwise. the source text is in the
6216    gap area of the buffer, and coding->src_pos specifies the offset of
6217    the text from GPT (which must be the same as PT).  If this is the
6218    same buffer as CODING->dst_object, CODING->src_pos must be
6219    negative and CODING should not have `pre-write-conversion'.
6220
6221    If CODING->src_object is a string, CODING should not have
6222    `pre-write-conversion'.
6223
6224    If CODING->dst_object is a buffer, the encoded data is inserted at
6225    the current point of that buffer.
6226
6227    If CODING->dst_object is nil, the encoded data is placed at the
6228    memory area specified by CODING->destination.  */
6229
6230 static int
6231 encode_coding (coding)
6232      struct coding_system *coding;
6233 {
6234   Lisp_Object attrs;
6235   Lisp_Object translation_table;
6236
6237   attrs = CODING_ID_ATTRS (coding->id);
6238   translation_table = get_translation_table (attrs, 1);
6239
6240   if (BUFFERP (coding->dst_object))
6241     {
6242       set_buffer_internal (XBUFFER (coding->dst_object));
6243       coding->dst_multibyte
6244         = ! NILP (current_buffer->enable_multibyte_characters);
6245     }
6246
6247   coding->consumed = coding->consumed_char = 0;
6248   coding->produced = coding->produced_char = 0;
6249   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6250   coding->errors = 0;
6251
6252   ALLOC_CONVERSION_WORK_AREA (coding);
6253
6254   do {
6255     coding_set_source (coding);
6256     consume_chars (coding);
6257
6258     if (!NILP (translation_table))
6259       translate_chars (coding, translation_table);
6260
6261     coding_set_destination (coding);
6262     (*(coding->encoder)) (coding);
6263   } while (coding->consumed_char < coding->src_chars);
6264
6265   if (BUFFERP (coding->dst_object))
6266     insert_from_gap (coding->produced_char, coding->produced);
6267
6268   return (coding->result);
6269 }
6270
6271
6272 /* Name (or base name) of work buffer for code conversion.  */
6273 static Lisp_Object Vcode_conversion_workbuf_name;
6274
6275 /* A working buffer used by the top level conversion.  Once it is
6276    created, it is never destroyed.  It has the name
6277    Vcode_conversion_workbuf_name.  The other working buffers are
6278    destroyed after the use is finished, and their names are modified
6279    versions of Vcode_conversion_workbuf_name.  */
6280 static Lisp_Object Vcode_conversion_reused_workbuf;
6281
6282 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
6283 static int reused_workbuf_in_use;
6284
6285
6286 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
6287    multibyteness of returning buffer.  */
6288
6289 Lisp_Object
6290 make_conversion_work_buffer (multibyte)
6291 {
6292   Lisp_Object name, workbuf;
6293   struct buffer *current;
6294
6295   if (reused_workbuf_in_use++)
6296     {
6297       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6298       workbuf = Fget_buffer_create (name);
6299     }
6300   else
6301     {
6302       name = Vcode_conversion_workbuf_name;
6303       workbuf = Fget_buffer_create (name);
6304       if (NILP (Vcode_conversion_reused_workbuf))
6305         Vcode_conversion_reused_workbuf = workbuf;
6306     }
6307   current = current_buffer;
6308   set_buffer_internal (XBUFFER (workbuf));
6309   Ferase_buffer ();
6310   current_buffer->undo_list = Qt;
6311   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
6312   set_buffer_internal (current);
6313   return workbuf;
6314 }
6315
6316
6317 static Lisp_Object
6318 code_conversion_restore (arg)
6319      Lisp_Object arg;
6320 {
6321   Lisp_Object current, workbuf;
6322
6323   current = XCAR (arg);
6324   workbuf = XCDR (arg);
6325   if (! NILP (workbuf))
6326     {
6327       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6328         reused_workbuf_in_use = 0;
6329       else if (! NILP (Fbuffer_live_p (workbuf)))
6330         Fkill_buffer (workbuf);
6331     }
6332   set_buffer_internal (XBUFFER (current));
6333   return Qnil;
6334 }
6335
6336 Lisp_Object
6337 code_conversion_save (with_work_buf, multibyte)
6338      int with_work_buf, multibyte;
6339 {
6340   Lisp_Object workbuf = Qnil;
6341
6342   if (with_work_buf)
6343     workbuf = make_conversion_work_buffer (multibyte);
6344   record_unwind_protect (code_conversion_restore,
6345                          Fcons (Fcurrent_buffer (), workbuf));
6346   return workbuf;
6347 }
6348
6349 int
6350 decode_coding_gap (coding, chars, bytes)
6351      struct coding_system *coding;
6352      EMACS_INT chars, bytes;
6353 {
6354   int count = specpdl_ptr - specpdl;
6355   Lisp_Object attrs;
6356
6357   code_conversion_save (0, 0);
6358
6359   coding->src_object = Fcurrent_buffer ();
6360   coding->src_chars = chars;
6361   coding->src_bytes = bytes;
6362   coding->src_pos = -chars;
6363   coding->src_pos_byte = -bytes;
6364   coding->src_multibyte = chars < bytes;
6365   coding->dst_object = coding->src_object;
6366   coding->dst_pos = PT;
6367   coding->dst_pos_byte = PT_BYTE;
6368   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6369   coding->mode |= CODING_MODE_LAST_BLOCK;
6370
6371   if (CODING_REQUIRE_DETECTION (coding))
6372     detect_coding (coding);
6373
6374   decode_coding (coding);
6375
6376   attrs = CODING_ID_ATTRS (coding->id);
6377   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6378     {
6379       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6380       Lisp_Object val;
6381
6382       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6383       val = call1 (CODING_ATTR_POST_READ (attrs),
6384                    make_number (coding->produced_char));
6385       CHECK_NATNUM (val);
6386       coding->produced_char += Z - prev_Z;
6387       coding->produced += Z_BYTE - prev_Z_BYTE;
6388     }
6389
6390   unbind_to (count, Qnil);
6391   return coding->result;
6392 }
6393
6394 int
6395 encode_coding_gap (coding, chars, bytes)
6396      struct coding_system *coding;
6397      EMACS_INT chars, bytes;
6398 {
6399   int count = specpdl_ptr - specpdl;
6400
6401   code_conversion_save (0, 0);
6402
6403   coding->src_object = Fcurrent_buffer ();
6404   coding->src_chars = chars;
6405   coding->src_bytes = bytes;
6406   coding->src_pos = -chars;
6407   coding->src_pos_byte = -bytes;
6408   coding->src_multibyte = chars < bytes;
6409   coding->dst_object = coding->src_object;
6410   coding->dst_pos = PT;
6411   coding->dst_pos_byte = PT_BYTE;
6412
6413   encode_coding (coding);
6414
6415   unbind_to (count, Qnil);
6416   return coding->result;
6417 }
6418
6419
6420 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6421    SRC_OBJECT into DST_OBJECT by coding context CODING.
6422
6423    SRC_OBJECT is a buffer, a string, or Qnil.
6424
6425    If it is a buffer, the text is at point of the buffer.  FROM and TO
6426    are positions in the buffer.
6427
6428    If it is a string, the text is at the beginning of the string.
6429    FROM and TO are indices to the string.
6430
6431    If it is nil, the text is at coding->source.  FROM and TO are
6432    indices to coding->source.
6433
6434    DST_OBJECT is a buffer, Qt, or Qnil.
6435
6436    If it is a buffer, the decoded text is inserted at point of the
6437    buffer.  If the buffer is the same as SRC_OBJECT, the source text
6438    is deleted.
6439
6440    If it is Qt, a string is made from the decoded text, and
6441    set in CODING->dst_object.
6442
6443    If it is Qnil, the decoded text is stored at CODING->destination.
6444    The caller must allocate CODING->dst_bytes bytes at
6445    CODING->destination by xmalloc.  If the decoded text is longer than
6446    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6447  */
6448
6449 void
6450 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6451                       dst_object)
6452      struct coding_system *coding;
6453      Lisp_Object src_object;
6454      EMACS_INT from, from_byte, to, to_byte;
6455      Lisp_Object dst_object;
6456 {
6457   int count = specpdl_ptr - specpdl;
6458   unsigned char *destination;
6459   EMACS_INT dst_bytes;
6460   EMACS_INT chars = to - from;
6461   EMACS_INT bytes = to_byte - from_byte;
6462   Lisp_Object attrs;
6463   Lisp_Object buffer;
6464   int saved_pt = -1, saved_pt_byte;
6465
6466   buffer = Fcurrent_buffer ();
6467
6468   if (NILP (dst_object))
6469     {
6470       destination = coding->destination;
6471       dst_bytes = coding->dst_bytes;
6472     }
6473
6474   coding->src_object = src_object;
6475   coding->src_chars = chars;
6476   coding->src_bytes = bytes;
6477   coding->src_multibyte = chars < bytes;
6478
6479   if (STRINGP (src_object))
6480     {
6481       coding->src_pos = from;
6482       coding->src_pos_byte = from_byte;
6483     }
6484   else if (BUFFERP (src_object))
6485     {
6486       set_buffer_internal (XBUFFER (src_object));
6487       if (from != GPT)
6488         move_gap_both (from, from_byte);
6489       if (EQ (src_object, dst_object))
6490         {
6491           saved_pt = PT, saved_pt_byte = PT_BYTE;
6492           TEMP_SET_PT_BOTH (from, from_byte);
6493           del_range_both (from, from_byte, to, to_byte, 1);
6494           coding->src_pos = -chars;
6495           coding->src_pos_byte = -bytes;
6496         }
6497       else
6498         {
6499           coding->src_pos = from;
6500           coding->src_pos_byte = from_byte;
6501         }
6502     }
6503
6504   if (CODING_REQUIRE_DETECTION (coding))
6505     detect_coding (coding);
6506   attrs = CODING_ID_ATTRS (coding->id);
6507
6508   if (EQ (dst_object, Qt)
6509       || (! NILP (CODING_ATTR_POST_READ (attrs))
6510           && NILP (dst_object)))
6511     {
6512       coding->dst_object = code_conversion_save (1, 1);
6513       coding->dst_pos = BEG;
6514       coding->dst_pos_byte = BEG_BYTE;
6515       coding->dst_multibyte = 1;
6516     }
6517   else if (BUFFERP (dst_object))
6518     {
6519       code_conversion_save (0, 0);
6520       coding->dst_object = dst_object;
6521       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6522       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6523       coding->dst_multibyte
6524         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6525     }
6526   else
6527     {
6528       code_conversion_save (0, 0);
6529       coding->dst_object = Qnil;
6530       coding->dst_multibyte = 1;
6531     }
6532
6533   decode_coding (coding);
6534
6535   if (BUFFERP (coding->dst_object))
6536     set_buffer_internal (XBUFFER (coding->dst_object));
6537
6538   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6539     {
6540       struct gcpro gcpro1, gcpro2;
6541       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6542       Lisp_Object val;
6543
6544       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6545       GCPRO2 (coding->src_object, coding->dst_object);
6546       val = call1 (CODING_ATTR_POST_READ (attrs),
6547                    make_number (coding->produced_char));
6548       UNGCPRO;
6549       CHECK_NATNUM (val);
6550       coding->produced_char += Z - prev_Z;
6551       coding->produced += Z_BYTE - prev_Z_BYTE;
6552     }
6553
6554   if (EQ (dst_object, Qt))
6555     {
6556       coding->dst_object = Fbuffer_string ();
6557     }
6558   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6559     {
6560       set_buffer_internal (XBUFFER (coding->dst_object));
6561       if (dst_bytes < coding->produced)
6562         {
6563           destination
6564             = (unsigned char *) xrealloc (destination, coding->produced);
6565           if (! destination)
6566             {
6567               record_conversion_result (coding,
6568                                         CODING_RESULT_INSUFFICIENT_DST);
6569               unbind_to (count, Qnil);
6570               return;
6571             }
6572           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6573             move_gap_both (BEGV, BEGV_BYTE);
6574           bcopy (BEGV_ADDR, destination, coding->produced);
6575           coding->destination = destination;
6576         }
6577     }
6578
6579   if (saved_pt >= 0)
6580     {
6581       /* This is the case of:
6582          (BUFFERP (src_object) && EQ (src_object, dst_object))
6583          As we have moved PT while replacing the original buffer
6584          contents, we must recover it now.  */
6585       set_buffer_internal (XBUFFER (src_object));
6586       if (saved_pt < from)
6587         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6588       else if (saved_pt < from + chars)
6589         TEMP_SET_PT_BOTH (from, from_byte);
6590       else if (! NILP (current_buffer->enable_multibyte_characters))
6591         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6592                           saved_pt_byte + (coding->produced - bytes));
6593       else
6594         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6595                           saved_pt_byte + (coding->produced - bytes));
6596     }
6597
6598   unbind_to (count, coding->dst_object);
6599 }
6600
6601
6602 void
6603 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6604                       dst_object)
6605      struct coding_system *coding;
6606      Lisp_Object src_object;
6607      EMACS_INT from, from_byte, to, to_byte;
6608      Lisp_Object dst_object;
6609 {
6610   int count = specpdl_ptr - specpdl;
6611   EMACS_INT chars = to - from;
6612   EMACS_INT bytes = to_byte - from_byte;
6613   Lisp_Object attrs;
6614   Lisp_Object buffer;
6615   int saved_pt = -1, saved_pt_byte;
6616
6617   buffer = Fcurrent_buffer ();
6618
6619   coding->src_object = src_object;
6620   coding->src_chars = chars;
6621   coding->src_bytes = bytes;
6622   coding->src_multibyte = chars < bytes;
6623
6624   attrs = CODING_ID_ATTRS (coding->id);
6625
6626   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6627     {
6628       coding->src_object = code_conversion_save (1, coding->src_multibyte);
6629       set_buffer_internal (XBUFFER (coding->src_object));
6630       if (STRINGP (src_object))
6631         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6632       else if (BUFFERP (src_object))
6633         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6634       else
6635         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6636
6637       if (EQ (src_object, dst_object))
6638         {
6639           set_buffer_internal (XBUFFER (src_object));
6640           saved_pt = PT, saved_pt_byte = PT_BYTE;
6641           del_range_both (from, from_byte, to, to_byte, 1);
6642           set_buffer_internal (XBUFFER (coding->src_object));
6643         }
6644
6645       call2 (CODING_ATTR_PRE_WRITE (attrs),
6646              make_number (BEG), make_number (Z));
6647       coding->src_object = Fcurrent_buffer ();
6648       if (BEG != GPT)
6649         move_gap_both (BEG, BEG_BYTE);
6650       coding->src_chars = Z - BEG;
6651       coding->src_bytes = Z_BYTE - BEG_BYTE;
6652       coding->src_pos = BEG;
6653       coding->src_pos_byte = BEG_BYTE;
6654       coding->src_multibyte = Z < Z_BYTE;
6655     }
6656   else if (STRINGP (src_object))
6657     {
6658       code_conversion_save (0, 0);
6659       coding->src_pos = from;
6660       coding->src_pos_byte = from_byte;
6661     }
6662   else if (BUFFERP (src_object))
6663     {
6664       code_conversion_save (0, 0);
6665       set_buffer_internal (XBUFFER (src_object));
6666       if (EQ (src_object, dst_object))
6667         {
6668           saved_pt = PT, saved_pt_byte = PT_BYTE;
6669           coding->src_object = del_range_1 (from, to, 1, 1);
6670           coding->src_pos = 0;
6671           coding->src_pos_byte = 0;
6672         }
6673       else
6674         {
6675           if (from < GPT && to >= GPT)
6676             move_gap_both (from, from_byte);
6677           coding->src_pos = from;
6678           coding->src_pos_byte = from_byte;
6679         }
6680     }
6681   else
6682     code_conversion_save (0, 0);
6683
6684   if (BUFFERP (dst_object))
6685     {
6686       coding->dst_object = dst_object;
6687       if (EQ (src_object, dst_object))
6688         {
6689           coding->dst_pos = from;
6690           coding->dst_pos_byte = from_byte;
6691         }
6692       else
6693         {
6694           coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6695           coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6696         }
6697       coding->dst_multibyte
6698         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6699     }
6700   else if (EQ (dst_object, Qt))
6701     {
6702       coding->dst_object = Qnil;
6703       coding->dst_bytes = coding->src_chars;
6704       if (coding->dst_bytes == 0)
6705         coding->dst_bytes = 1;
6706       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
6707       coding->dst_multibyte = 0;
6708     }
6709   else
6710     {
6711       coding->dst_object = Qnil;
6712       coding->dst_multibyte = 0;
6713     }
6714
6715   encode_coding (coding);
6716
6717   if (EQ (dst_object, Qt))
6718     {
6719       if (BUFFERP (coding->dst_object))
6720         coding->dst_object = Fbuffer_string ();
6721       else
6722         {
6723           coding->dst_object
6724             = make_unibyte_string ((char *) coding->destination,
6725                                    coding->produced);
6726           xfree (coding->destination);
6727         }
6728     }
6729
6730   if (saved_pt >= 0)
6731     {
6732       /* This is the case of:
6733          (BUFFERP (src_object) && EQ (src_object, dst_object))
6734          As we have moved PT while replacing the original buffer
6735          contents, we must recover it now.  */
6736       set_buffer_internal (XBUFFER (src_object));
6737       if (saved_pt < from)
6738         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6739       else if (saved_pt < from + chars)
6740         TEMP_SET_PT_BOTH (from, from_byte);
6741       else if (! NILP (current_buffer->enable_multibyte_characters))
6742         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6743                           saved_pt_byte + (coding->produced - bytes));
6744       else
6745         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6746                           saved_pt_byte + (coding->produced - bytes));
6747     }
6748
6749   unbind_to (count, Qnil);
6750 }
6751
6752
6753 Lisp_Object
6754 preferred_coding_system ()
6755 {
6756   int id = coding_categories[coding_priorities[0]].id;
6757
6758   return CODING_ID_NAME (id);
6759 }
6760
6761 \f
6762 #ifdef emacs
6763 /*** 8. Emacs Lisp library functions ***/
6764
6765 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6766        doc: /* Return t if OBJECT is nil or a coding-system.
6767 See the documentation of `define-coding-system' for information
6768 about coding-system objects.  */)
6769      (obj)
6770      Lisp_Object obj;
6771 {
6772   return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6773 }
6774
6775 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6776        Sread_non_nil_coding_system, 1, 1, 0,
6777        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6778      (prompt)
6779      Lisp_Object prompt;
6780 {
6781   Lisp_Object val;
6782   do
6783     {
6784       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6785                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6786     }
6787   while (SCHARS (val) == 0);
6788   return (Fintern (val, Qnil));
6789 }
6790
6791 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6792        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6793 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6794      (prompt, default_coding_system)
6795      Lisp_Object prompt, default_coding_system;
6796 {
6797   Lisp_Object val;
6798   if (SYMBOLP (default_coding_system))
6799     XSETSTRING (default_coding_system, XPNTR (SYMBOL_NAME (default_coding_system)));
6800   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6801                           Qt, Qnil, Qcoding_system_history,
6802                           default_coding_system, Qnil);
6803   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
6804 }
6805
6806 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6807        1, 1, 0,
6808        doc: /* Check validity of CODING-SYSTEM.
6809 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.  */)
6810   (coding_system)
6811      Lisp_Object coding_system;
6812 {
6813   CHECK_SYMBOL (coding_system);
6814   if (!NILP (Fcoding_system_p (coding_system)))
6815     return coding_system;
6816   while (1)
6817     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6818 }
6819
6820 \f
6821 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
6822    HIGHEST is nonzero, return the coding system of the highest
6823    priority among the detected coding systems.  Otherwize return a
6824    list of detected coding systems sorted by their priorities.  If
6825    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6826    multibyte form but contains only ASCII and eight-bit chars.
6827    Otherwise, the bytes are raw bytes.
6828
6829    CODING-SYSTEM controls the detection as below:
6830
6831    If it is nil, detect both text-format and eol-format.  If the
6832    text-format part of CODING-SYSTEM is already specified
6833    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
6834    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6835    detect only text-format.  */
6836
6837 Lisp_Object
6838 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
6839                       coding_system)
6840      const unsigned char *src;
6841      int src_chars, src_bytes, highest;
6842      int multibytep;
6843      Lisp_Object coding_system;
6844 {
6845   const unsigned char *src_end = src + src_bytes;
6846   Lisp_Object attrs, eol_type;
6847   Lisp_Object val;
6848   struct coding_system coding;
6849   int id;
6850   struct coding_detection_info detect_info;
6851   enum coding_category base_category;
6852
6853   if (NILP (coding_system))
6854     coding_system = Qundecided;
6855   setup_coding_system (coding_system, &coding);
6856   attrs = CODING_ID_ATTRS (coding.id);
6857   eol_type = CODING_ID_EOL_TYPE (coding.id);
6858   coding_system = CODING_ATTR_BASE_NAME (attrs);
6859
6860   coding.source = src;
6861   coding.src_chars = src_chars;
6862   coding.src_bytes = src_bytes;
6863   coding.src_multibyte = multibytep;
6864   coding.consumed = 0;
6865   coding.mode |= CODING_MODE_LAST_BLOCK;
6866
6867   detect_info.checked = detect_info.found = detect_info.rejected = 0;
6868
6869   /* At first, detect text-format if necessary.  */
6870   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
6871   if (base_category == coding_category_undecided)
6872     {
6873       enum coding_category category;
6874       struct coding_system *this;
6875       int c, i;
6876
6877       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
6878       for (i = 0; src < src_end; i++, src++)
6879         {
6880           c = *src;
6881           if (c & 0x80 || (c < 0x20 && (c == 0
6882                                         || c == ISO_CODE_ESC
6883                                         || c == ISO_CODE_SI
6884                                         || c == ISO_CODE_SO)))
6885             break;
6886         }
6887       coding.head_ascii = src - coding.source;
6888
6889       if (src < src_end)
6890         for (i = 0; i < coding_category_raw_text; i++)
6891           {
6892             category = coding_priorities[i];
6893             this = coding_categories + category;
6894
6895             if (this->id < 0)
6896               {
6897                 /* No coding system of this category is defined.  */
6898                 detect_info.rejected |= (1 << category);
6899               }
6900             else if (category >= coding_category_raw_text)
6901               continue;
6902             else if (detect_info.checked & (1 << category))
6903               {
6904                 if (highest
6905                     && (detect_info.found & (1 << category)))
6906                   break;
6907               }
6908             else
6909               {
6910                 if ((*(this->detector)) (&coding, &detect_info)
6911                     && highest
6912                     && (detect_info.found & (1 << category)))
6913                   {
6914                     if (category == coding_category_utf_16_auto)
6915                       {
6916                         if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6917                           category = coding_category_utf_16_le;
6918                         else
6919                           category = coding_category_utf_16_be;
6920                       }
6921                     break;
6922                   }
6923               }
6924           }
6925
6926       if (detect_info.rejected == CATEGORY_MASK_ANY)
6927         {
6928           detect_info.found = CATEGORY_MASK_RAW_TEXT;
6929           id = coding_categories[coding_category_raw_text].id;
6930           val = Fcons (make_number (id), Qnil);
6931         }
6932       else if (! detect_info.rejected && ! detect_info.found)
6933         {
6934           detect_info.found = CATEGORY_MASK_ANY;
6935           id = coding_categories[coding_category_undecided].id;
6936           val = Fcons (make_number (id), Qnil);
6937         }
6938       else if (highest)
6939         {
6940           if (detect_info.found)
6941             {
6942               detect_info.found = 1 << category;
6943               val = Fcons (make_number (this->id), Qnil);
6944             }
6945           else
6946             for (i = 0; i < coding_category_raw_text; i++)
6947               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6948                 {
6949                   detect_info.found = 1 << coding_priorities[i];
6950                   id = coding_categories[coding_priorities[i]].id;
6951                   val = Fcons (make_number (id), Qnil);
6952                   break;
6953                 }
6954         }
6955       else
6956         {
6957           int mask = detect_info.rejected | detect_info.found;
6958           int found = 0;
6959           val = Qnil;
6960
6961           for (i = coding_category_raw_text - 1; i >= 0; i--)
6962             {
6963               category = coding_priorities[i];
6964               if (! (mask & (1 << category)))
6965                 {
6966                   found |= 1 << category;
6967                   id = coding_categories[category].id;
6968                   val = Fcons (make_number (id), val);
6969                 }
6970             }
6971           for (i = coding_category_raw_text - 1; i >= 0; i--)
6972             {
6973               category = coding_priorities[i];
6974               if (detect_info.found & (1 << category))
6975                 {
6976                   id = coding_categories[category].id;
6977                   val = Fcons (make_number (id), val);
6978                 }
6979             }
6980           detect_info.found |= found;
6981         }
6982     }
6983   else if (base_category == coding_category_utf_16_auto)
6984     {
6985       if (detect_coding_utf_16 (&coding, &detect_info))
6986         {
6987           enum coding_category category;
6988           struct coding_system *this;
6989
6990           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6991             this = coding_categories + coding_category_utf_16_le;
6992           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6993             this = coding_categories + coding_category_utf_16_be;
6994           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
6995             this = coding_categories + coding_category_utf_16_be_nosig;
6996           else
6997             this = coding_categories + coding_category_utf_16_le_nosig;
6998           val = Fcons (make_number (this->id), Qnil);
6999         }
7000     }
7001   else
7002     {
7003       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
7004       val = Fcons (make_number (coding.id), Qnil);
7005     }
7006
7007   /* Then, detect eol-format if necessary.  */
7008   {
7009     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
7010     Lisp_Object tail;
7011
7012     if (VECTORP (eol_type))
7013       {
7014         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
7015           normal_eol = detect_eol (coding.source, src_bytes,
7016                                    coding_category_raw_text);
7017         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7018                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
7019           utf_16_be_eol = detect_eol (coding.source, src_bytes,
7020                                       coding_category_utf_16_be);
7021         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7022                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
7023           utf_16_le_eol = detect_eol (coding.source, src_bytes,
7024                                       coding_category_utf_16_le);
7025       }
7026     else
7027       {
7028         if (EQ (eol_type, Qunix))
7029           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7030         else if (EQ (eol_type, Qdos))
7031           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7032         else
7033           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7034       }
7035
7036     for (tail = val; CONSP (tail); tail = XCDR (tail))
7037       {
7038         enum coding_category category;
7039         int this_eol;
7040
7041         id = XINT (XCAR (tail));
7042         attrs = CODING_ID_ATTRS (id);
7043         category = XINT (CODING_ATTR_CATEGORY (attrs));
7044         eol_type = CODING_ID_EOL_TYPE (id);
7045         if (VECTORP (eol_type))
7046           {
7047             if (category == coding_category_utf_16_be
7048                 || category == coding_category_utf_16_be_nosig)
7049               this_eol = utf_16_be_eol;
7050             else if (category == coding_category_utf_16_le
7051                      || category == coding_category_utf_16_le_nosig)
7052               this_eol = utf_16_le_eol;
7053             else
7054               this_eol = normal_eol;
7055
7056             if (this_eol == EOL_SEEN_LF)
7057               XSETCAR (tail, AREF (eol_type, 0));
7058             else if (this_eol == EOL_SEEN_CRLF)
7059               XSETCAR (tail, AREF (eol_type, 1));
7060             else if (this_eol == EOL_SEEN_CR)
7061               XSETCAR (tail, AREF (eol_type, 2));
7062             else
7063               XSETCAR (tail, CODING_ID_NAME (id));
7064           }
7065         else
7066           XSETCAR (tail, CODING_ID_NAME (id));
7067       }
7068   }
7069
7070   return (highest ? XCAR (val) : val);
7071 }
7072
7073
7074 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7075        2, 3, 0,
7076        doc: /* Detect coding system of the text in the region between START and END.
7077 Return a list of possible coding systems ordered by priority.
7078
7079 If only ASCII characters are found, it returns a list of single element
7080 `undecided' or its subsidiary coding system according to a detected
7081 end-of-line format.
7082
7083 If optional argument HIGHEST is non-nil, return the coding system of
7084 highest priority.  */)
7085      (start, end, highest)
7086      Lisp_Object start, end, highest;
7087 {
7088   int from, to;
7089   int from_byte, to_byte;
7090
7091   CHECK_NUMBER_COERCE_MARKER (start);
7092   CHECK_NUMBER_COERCE_MARKER (end);
7093
7094   validate_region (&start, &end);
7095   from = XINT (start), to = XINT (end);
7096   from_byte = CHAR_TO_BYTE (from);
7097   to_byte = CHAR_TO_BYTE (to);
7098
7099   if (from < GPT && to >= GPT)
7100     move_gap_both (to, to_byte);
7101
7102   return detect_coding_system (BYTE_POS_ADDR (from_byte),
7103                                to - from, to_byte - from_byte,
7104                                !NILP (highest),
7105                                !NILP (current_buffer
7106                                       ->enable_multibyte_characters),
7107                                Qnil);
7108 }
7109
7110 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7111        1, 2, 0,
7112        doc: /* Detect coding system of the text in STRING.
7113 Return a list of possible coding systems ordered by priority.
7114
7115 If only ASCII characters are found, it returns a list of single element
7116 `undecided' or its subsidiary coding system according to a detected
7117 end-of-line format.
7118
7119 If optional argument HIGHEST is non-nil, return the coding system of
7120 highest priority.  */)
7121      (string, highest)
7122      Lisp_Object string, highest;
7123 {
7124   CHECK_STRING (string);
7125
7126   return detect_coding_system (SDATA (string),
7127                                SCHARS (string), SBYTES (string),
7128                                !NILP (highest), STRING_MULTIBYTE (string),
7129                                Qnil);
7130 }
7131
7132
7133 static INLINE int
7134 char_encodable_p (c, attrs)
7135      int c;
7136      Lisp_Object attrs;
7137 {
7138   Lisp_Object tail;
7139   struct charset *charset;
7140   Lisp_Object translation_table;
7141
7142   translation_table = CODING_ATTR_TRANS_TBL (attrs);
7143   if (! NILP (translation_table))
7144     c = translate_char (translation_table, c);
7145   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7146        CONSP (tail); tail = XCDR (tail))
7147     {
7148       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7149       if (CHAR_CHARSET_P (c, charset))
7150         break;
7151     }
7152   return (! NILP (tail));
7153 }
7154
7155
7156 /* Return a list of coding systems that safely encode the text between
7157    START and END.  If EXCLUDE is non-nil, it is a list of coding
7158    systems not to check.  The returned list doesn't contain any such
7159    coding systems.  In any case, if the text contains only ASCII or is
7160    unibyte, return t.  */
7161
7162 DEFUN ("find-coding-systems-region-internal",
7163        Ffind_coding_systems_region_internal,
7164        Sfind_coding_systems_region_internal, 2, 3, 0,
7165        doc: /* Internal use only.  */)
7166      (start, end, exclude)
7167      Lisp_Object start, end, exclude;
7168 {
7169   Lisp_Object coding_attrs_list, safe_codings;
7170   EMACS_INT start_byte, end_byte;
7171   const unsigned char *p, *pbeg, *pend;
7172   int c;
7173   Lisp_Object tail, elt;
7174
7175   if (STRINGP (start))
7176     {
7177       if (!STRING_MULTIBYTE (start)
7178           || SCHARS (start) == SBYTES (start))
7179         return Qt;
7180       start_byte = 0;
7181       end_byte = SBYTES (start);
7182     }
7183   else
7184     {
7185       CHECK_NUMBER_COERCE_MARKER (start);
7186       CHECK_NUMBER_COERCE_MARKER (end);
7187       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7188         args_out_of_range (start, end);
7189       if (NILP (current_buffer->enable_multibyte_characters))
7190         return Qt;
7191       start_byte = CHAR_TO_BYTE (XINT (start));
7192       end_byte = CHAR_TO_BYTE (XINT (end));
7193       if (XINT (end) - XINT (start) == end_byte - start_byte)
7194         return Qt;
7195
7196       if (XINT (start) < GPT && XINT (end) > GPT)
7197         {
7198           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7199             move_gap_both (XINT (start), start_byte);
7200           else
7201             move_gap_both (XINT (end), end_byte);
7202         }
7203     }
7204
7205   coding_attrs_list = Qnil;
7206   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7207     if (NILP (exclude)
7208         || NILP (Fmemq (XCAR (tail), exclude)))
7209       {
7210         Lisp_Object attrs;
7211
7212         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7213         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7214             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7215           {
7216             ASET (attrs, coding_attr_trans_tbl,
7217                   get_translation_table (attrs, 1));
7218             coding_attrs_list = Fcons (attrs, coding_attrs_list);
7219           }
7220       }
7221
7222   if (STRINGP (start))
7223     p = pbeg = SDATA (start);
7224   else
7225     p = pbeg = BYTE_POS_ADDR (start_byte);
7226   pend = p + (end_byte - start_byte);
7227
7228   while (p < pend && ASCII_BYTE_P (*p)) p++;
7229   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7230
7231   while (p < pend)
7232     {
7233       if (ASCII_BYTE_P (*p))
7234         p++;
7235       else
7236         {
7237           c = STRING_CHAR_ADVANCE (p);
7238
7239           charset_map_loaded = 0;
7240           for (tail = coding_attrs_list; CONSP (tail);)
7241             {
7242               elt = XCAR (tail);
7243               if (NILP (elt))
7244                 tail = XCDR (tail);
7245               else if (char_encodable_p (c, elt))
7246                 tail = XCDR (tail);
7247               else if (CONSP (XCDR (tail)))
7248                 {
7249                   XSETCAR (tail, XCAR (XCDR (tail)));
7250                   XSETCDR (tail, XCDR (XCDR (tail)));
7251                 }
7252               else
7253                 {
7254                   XSETCAR (tail, Qnil);
7255                   tail = XCDR (tail);
7256                 }
7257             }
7258           if (charset_map_loaded)
7259             {
7260               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7261
7262               if (STRINGP (start))
7263                 pbeg = SDATA (start);
7264               else
7265                 pbeg = BYTE_POS_ADDR (start_byte);
7266               p = pbeg + p_offset;
7267               pend = pbeg + pend_offset;
7268             }
7269         }
7270     }
7271
7272   safe_codings = Qnil;
7273   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7274     if (! NILP (XCAR (tail)))
7275       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
7276
7277   return safe_codings;
7278 }
7279
7280
7281 DEFUN ("unencodable-char-position", Funencodable_char_position,
7282        Sunencodable_char_position, 3, 5, 0,
7283        doc: /*
7284 Return position of first un-encodable character in a region.
7285 START and END specfiy the region and CODING-SYSTEM specifies the
7286 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
7287
7288 If optional 4th argument COUNT is non-nil, it specifies at most how
7289 many un-encodable characters to search.  In this case, the value is a
7290 list of positions.
7291
7292 If optional 5th argument STRING is non-nil, it is a string to search
7293 for un-encodable characters.  In that case, START and END are indexes
7294 to the string.  */)
7295      (start, end, coding_system, count, string)
7296      Lisp_Object start, end, coding_system, count, string;
7297 {
7298   int n;
7299   struct coding_system coding;
7300   Lisp_Object attrs, charset_list, translation_table;
7301   Lisp_Object positions;
7302   int from, to;
7303   const unsigned char *p, *stop, *pend;
7304   int ascii_compatible;
7305
7306   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7307   attrs = CODING_ID_ATTRS (coding.id);
7308   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7309     return Qnil;
7310   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7311   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7312   translation_table = get_translation_table (attrs, 1);
7313
7314   if (NILP (string))
7315     {
7316       validate_region (&start, &end);
7317       from = XINT (start);
7318       to = XINT (end);
7319       if (NILP (current_buffer->enable_multibyte_characters)
7320           || (ascii_compatible
7321               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7322         return Qnil;
7323       p = CHAR_POS_ADDR (from);
7324       pend = CHAR_POS_ADDR (to);
7325       if (from < GPT && to >= GPT)
7326         stop = GPT_ADDR;
7327       else
7328         stop = pend;
7329     }
7330   else
7331     {
7332       CHECK_STRING (string);
7333       CHECK_NATNUM (start);
7334       CHECK_NATNUM (end);
7335       from = XINT (start);
7336       to = XINT (end);
7337       if (from > to
7338           || to > SCHARS (string))
7339         args_out_of_range_3 (string, start, end);
7340       if (! STRING_MULTIBYTE (string))
7341         return Qnil;
7342       p = SDATA (string) + string_char_to_byte (string, from);
7343       stop = pend = SDATA (string) + string_char_to_byte (string, to);
7344       if (ascii_compatible && (to - from) == (pend - p))
7345         return Qnil;
7346     }
7347
7348   if (NILP (count))
7349     n = 1;
7350   else
7351     {
7352       CHECK_NATNUM (count);
7353       n = XINT (count);
7354     }
7355
7356   positions = Qnil;
7357   while (1)
7358     {
7359       int c;
7360
7361       if (ascii_compatible)
7362         while (p < stop && ASCII_BYTE_P (*p))
7363           p++, from++;
7364       if (p >= stop)
7365         {
7366           if (p >= pend)
7367             break;
7368           stop = pend;
7369           p = GAP_END_ADDR;
7370         }
7371
7372       c = STRING_CHAR_ADVANCE (p);
7373       if (! (ASCII_CHAR_P (c) && ascii_compatible)
7374           && ! char_charset (translate_char (translation_table, c),
7375                              charset_list, NULL))
7376         {
7377           positions = Fcons (make_number (from), positions);
7378           n--;
7379           if (n == 0)
7380             break;
7381         }
7382
7383       from++;
7384     }
7385
7386   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
7387 }
7388
7389
7390 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7391        Scheck_coding_systems_region, 3, 3, 0,
7392        doc: /* Check if the region is encodable by coding systems.
7393
7394 START and END are buffer positions specifying the region.
7395 CODING-SYSTEM-LIST is a list of coding systems to check.
7396
7397 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7398 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7399 whole region, POS0, POS1, ... are buffer positions where non-encodable
7400 characters are found.
7401
7402 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7403 value is nil.
7404
7405 START may be a string.  In that case, check if the string is
7406 encodable, and the value contains indices to the string instead of
7407 buffer positions.  END is ignored.  */)
7408      (start, end, coding_system_list)
7409      Lisp_Object start, end, coding_system_list;
7410 {
7411   Lisp_Object list;
7412   EMACS_INT start_byte, end_byte;
7413   int pos;
7414   const unsigned char *p, *pbeg, *pend;
7415   int c;
7416   Lisp_Object tail, elt, attrs;
7417
7418   if (STRINGP (start))
7419     {
7420       if (!STRING_MULTIBYTE (start)
7421           && SCHARS (start) != SBYTES (start))
7422         return Qnil;
7423       start_byte = 0;
7424       end_byte = SBYTES (start);
7425       pos = 0;
7426     }
7427   else
7428     {
7429       CHECK_NUMBER_COERCE_MARKER (start);
7430       CHECK_NUMBER_COERCE_MARKER (end);
7431       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7432         args_out_of_range (start, end);
7433       if (NILP (current_buffer->enable_multibyte_characters))
7434         return Qnil;
7435       start_byte = CHAR_TO_BYTE (XINT (start));
7436       end_byte = CHAR_TO_BYTE (XINT (end));
7437       if (XINT (end) - XINT (start) == end_byte - start_byte)
7438         return Qt;
7439
7440       if (XINT (start) < GPT && XINT (end) > GPT)
7441         {
7442           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7443             move_gap_both (XINT (start), start_byte);
7444           else
7445             move_gap_both (XINT (end), end_byte);
7446         }
7447       pos = XINT (start);
7448     }
7449
7450   list = Qnil;
7451   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7452     {
7453       elt = XCAR (tail);
7454       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
7455       ASET (attrs, coding_attr_trans_tbl, get_translation_table (attrs, 1));
7456       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
7457     }
7458
7459   if (STRINGP (start))
7460     p = pbeg = SDATA (start);
7461   else
7462     p = pbeg = BYTE_POS_ADDR (start_byte);
7463   pend = p + (end_byte - start_byte);
7464
7465   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7466   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7467
7468   while (p < pend)
7469     {
7470       if (ASCII_BYTE_P (*p))
7471         p++;
7472       else
7473         {
7474           c = STRING_CHAR_ADVANCE (p);
7475
7476           charset_map_loaded = 0;
7477           for (tail = list; CONSP (tail); tail = XCDR (tail))
7478             {
7479               elt = XCDR (XCAR (tail));
7480               if (! char_encodable_p (c, XCAR (elt)))
7481                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7482             }
7483           if (charset_map_loaded)
7484             {
7485               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7486
7487               if (STRINGP (start))
7488                 pbeg = SDATA (start);
7489               else
7490                 pbeg = BYTE_POS_ADDR (start_byte);
7491               p = pbeg + p_offset;
7492               pend = pbeg + pend_offset;
7493             }
7494         }
7495       pos++;
7496     }
7497
7498   tail = list;
7499   list = Qnil;
7500   for (; CONSP (tail); tail = XCDR (tail))
7501     {
7502       elt = XCAR (tail);
7503       if (CONSP (XCDR (XCDR (elt))))
7504         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7505                       list);
7506     }
7507
7508   return list;
7509 }
7510
7511
7512 Lisp_Object
7513 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7514      Lisp_Object start, end, coding_system, dst_object;
7515      int encodep, norecord;
7516 {
7517   struct coding_system coding;
7518   EMACS_INT from, from_byte, to, to_byte;
7519   Lisp_Object src_object;
7520
7521   CHECK_NUMBER_COERCE_MARKER (start);
7522   CHECK_NUMBER_COERCE_MARKER (end);
7523   if (NILP (coding_system))
7524     coding_system = Qno_conversion;
7525   else
7526     CHECK_CODING_SYSTEM (coding_system);
7527   src_object = Fcurrent_buffer ();
7528   if (NILP (dst_object))
7529     dst_object = src_object;
7530   else if (! EQ (dst_object, Qt))
7531     CHECK_BUFFER (dst_object);
7532
7533   validate_region (&start, &end);
7534   from = XFASTINT (start);
7535   from_byte = CHAR_TO_BYTE (from);
7536   to = XFASTINT (end);
7537   to_byte = CHAR_TO_BYTE (to);
7538
7539   setup_coding_system (coding_system, &coding);
7540   coding.mode |= CODING_MODE_LAST_BLOCK;
7541
7542   if (encodep)
7543     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7544                           dst_object);
7545   else
7546     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7547                           dst_object);
7548   if (! norecord)
7549     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7550
7551   return (BUFFERP (dst_object)
7552           ? make_number (coding.produced_char)
7553           : coding.dst_object);
7554 }
7555
7556
7557 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
7558        3, 4, "r\nzCoding system: ",
7559        doc: /* Decode the current region from the specified coding system.
7560 When called from a program, takes four arguments:
7561         START, END, CODING-SYSTEM, and DESTINATION.
7562 START and END are buffer positions.
7563
7564 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7565 If nil, the region between START and END is replace by the decoded text.
7566 If buffer, the decoded text is inserted in the buffer.
7567 If t, the decoded text is returned.
7568
7569 This function sets `last-coding-system-used' to the precise coding system
7570 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7571 not fully specified.)
7572 It returns the length of the decoded text.  */)
7573      (start, end, coding_system, destination)
7574      Lisp_Object start, end, coding_system, destination;
7575 {
7576   return code_convert_region (start, end, coding_system, destination, 0, 0);
7577 }
7578
7579 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
7580        3, 4, "r\nzCoding system: ",
7581        doc: /* Encode the current region by specified coding system.
7582 When called from a program, takes three arguments:
7583 START, END, and CODING-SYSTEM.  START and END are buffer positions.
7584
7585 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7586 If nil, the region between START and END is replace by the encoded text.
7587 If buffer, the encoded text is inserted in the buffer.
7588 If t, the encoded text is returned.
7589
7590 This function sets `last-coding-system-used' to the precise coding system
7591 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7592 not fully specified.)
7593 It returns the length of the encoded text.  */)
7594   (start, end, coding_system, destination)
7595      Lisp_Object start, end, coding_system, destination;
7596 {
7597   return code_convert_region (start, end, coding_system, destination, 1, 0);
7598 }
7599
7600 Lisp_Object
7601 code_convert_string (string, coding_system, dst_object,
7602                      encodep, nocopy, norecord)
7603      Lisp_Object string, coding_system, dst_object;
7604      int encodep, nocopy, norecord;
7605 {
7606   struct coding_system coding;
7607   EMACS_INT chars, bytes;
7608
7609   CHECK_STRING (string);
7610   if (NILP (coding_system))
7611     {
7612       if (! norecord)
7613         Vlast_coding_system_used = Qno_conversion;
7614       if (NILP (dst_object))
7615         return (nocopy ? Fcopy_sequence (string) : string);
7616     }
7617
7618   if (NILP (coding_system))
7619     coding_system = Qno_conversion;
7620   else
7621     CHECK_CODING_SYSTEM (coding_system);
7622   if (NILP (dst_object))
7623     dst_object = Qt;
7624   else if (! EQ (dst_object, Qt))
7625     CHECK_BUFFER (dst_object);
7626
7627   setup_coding_system (coding_system, &coding);
7628   coding.mode |= CODING_MODE_LAST_BLOCK;
7629   chars = SCHARS (string);
7630   bytes = SBYTES (string);
7631   if (encodep)
7632     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7633   else
7634     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7635   if (! norecord)
7636     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7637
7638   return (BUFFERP (dst_object)
7639           ? make_number (coding.produced_char)
7640           : coding.dst_object);
7641 }
7642
7643
7644 /* Encode or decode STRING according to CODING_SYSTEM.
7645    Do not set Vlast_coding_system_used.
7646
7647    This function is called only from macros DECODE_FILE and
7648    ENCODE_FILE, thus we ignore character composition.  */
7649
7650 Lisp_Object
7651 code_convert_string_norecord (string, coding_system, encodep)
7652      Lisp_Object string, coding_system;
7653      int encodep;
7654 {
7655   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
7656 }
7657
7658
7659 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7660        2, 4, 0,
7661        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7662
7663 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7664 if the decoding operation is trivial.
7665
7666 Optional fourth arg BUFFER non-nil meant that the decoded text is
7667 inserted in BUFFER instead of returned as a string.  In this case,
7668 the return value is BUFFER.
7669
7670 This function sets `last-coding-system-used' to the precise coding system
7671 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7672 not fully specified.  */)
7673   (string, coding_system, nocopy, buffer)
7674      Lisp_Object string, coding_system, nocopy, buffer;
7675 {
7676   return code_convert_string (string, coding_system, buffer,
7677                               0, ! NILP (nocopy), 0);
7678 }
7679
7680 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7681        2, 4, 0,
7682        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7683
7684 Optional third arg NOCOPY non-nil means it is OK to return STRING
7685 itself if the encoding operation is trivial.
7686
7687 Optional fourth arg BUFFER non-nil meant that the encoded text is
7688 inserted in BUFFER instead of returned as a string.  In this case,
7689 the return value is BUFFER.
7690
7691 This function sets `last-coding-system-used' to the precise coding system
7692 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7693 not fully specified.)  */)
7694      (string, coding_system, nocopy, buffer)
7695      Lisp_Object string, coding_system, nocopy, buffer;
7696 {
7697   return code_convert_string (string, coding_system, buffer,
7698                               1, ! NILP (nocopy), 1);
7699 }
7700
7701 \f
7702 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7703        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7704 Return the corresponding character.  */)
7705      (code)
7706      Lisp_Object code;
7707 {
7708   Lisp_Object spec, attrs, val;
7709   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7710   int c;
7711
7712   CHECK_NATNUM (code);
7713   c = XFASTINT (code);
7714   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7715   attrs = AREF (spec, 0);
7716
7717   if (ASCII_BYTE_P (c)
7718       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7719     return code;
7720
7721   val = CODING_ATTR_CHARSET_LIST (attrs);
7722   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7723   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7724   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
7725
7726   if (c <= 0x7F)
7727     charset = charset_roman;
7728   else if (c >= 0xA0 && c < 0xDF)
7729     {
7730       charset = charset_kana;
7731       c -= 0x80;
7732     }
7733   else
7734     {
7735       int s1 = c >> 8, s2 = c & 0xFF;
7736
7737       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7738           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7739         error ("Invalid code: %d", code);
7740       SJIS_TO_JIS (c);
7741       charset = charset_kanji;
7742     }
7743   c = DECODE_CHAR (charset, c);
7744   if (c < 0)
7745     error ("Invalid code: %d", code);
7746   return make_number (c);
7747 }
7748
7749
7750 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7751        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7752 Return the corresponding code in SJIS.  */)
7753      (ch)
7754     Lisp_Object ch;
7755 {
7756   Lisp_Object spec, attrs, charset_list;
7757   int c;
7758   struct charset *charset;
7759   unsigned code;
7760
7761   CHECK_CHARACTER (ch);
7762   c = XFASTINT (ch);
7763   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7764   attrs = AREF (spec, 0);
7765
7766   if (ASCII_CHAR_P (c)
7767       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7768     return ch;
7769
7770   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7771   charset = char_charset (c, charset_list, &code);
7772   if (code == CHARSET_INVALID_CODE (charset))
7773     error ("Can't encode by shift_jis encoding: %d", c);
7774   JIS_TO_SJIS (code);
7775
7776   return make_number (code);
7777 }
7778
7779 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7780        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7781 Return the corresponding character.  */)
7782      (code)
7783      Lisp_Object code;
7784 {
7785   Lisp_Object spec, attrs, val;
7786   struct charset *charset_roman, *charset_big5, *charset;
7787   int c;
7788
7789   CHECK_NATNUM (code);
7790   c = XFASTINT (code);
7791   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7792   attrs = AREF (spec, 0);
7793
7794   if (ASCII_BYTE_P (c)
7795       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7796     return code;
7797
7798   val = CODING_ATTR_CHARSET_LIST (attrs);
7799   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7800   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7801
7802   if (c <= 0x7F)
7803     charset = charset_roman;
7804   else
7805     {
7806       int b1 = c >> 8, b2 = c & 0x7F;
7807       if (b1 < 0xA1 || b1 > 0xFE
7808           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7809         error ("Invalid code: %d", code);
7810       charset = charset_big5;
7811     }
7812   c = DECODE_CHAR (charset, (unsigned )c);
7813   if (c < 0)
7814     error ("Invalid code: %d", code);
7815   return make_number (c);
7816 }
7817
7818 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7819        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7820 Return the corresponding character code in Big5.  */)
7821      (ch)
7822      Lisp_Object ch;
7823 {
7824   Lisp_Object spec, attrs, charset_list;
7825   struct charset *charset;
7826   int c;
7827   unsigned code;
7828
7829   CHECK_CHARACTER (ch);
7830   c = XFASTINT (ch);
7831   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7832   attrs = AREF (spec, 0);
7833   if (ASCII_CHAR_P (c)
7834       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7835     return ch;
7836
7837   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7838   charset = char_charset (c, charset_list, &code);
7839   if (code == CHARSET_INVALID_CODE (charset))
7840     error ("Can't encode by Big5 encoding: %d", c);
7841
7842   return make_number (code);
7843 }
7844
7845 \f
7846 DEFUN ("set-terminal-coding-system-internal",
7847        Fset_terminal_coding_system_internal,
7848        Sset_terminal_coding_system_internal, 1, 1, 0,
7849        doc: /* Internal use only.  */)
7850      (coding_system)
7851      Lisp_Object coding_system;
7852 {
7853   CHECK_SYMBOL (coding_system);
7854   setup_coding_system (Fcheck_coding_system (coding_system),
7855                         &terminal_coding);
7856
7857   /* We had better not send unsafe characters to terminal.  */
7858   terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7859   /* Characer composition should be disabled.  */
7860   terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7861   terminal_coding.src_multibyte = 1;
7862   terminal_coding.dst_multibyte = 0;
7863   return Qnil;
7864 }
7865
7866 DEFUN ("set-safe-terminal-coding-system-internal",
7867        Fset_safe_terminal_coding_system_internal,
7868        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7869        doc: /* Internal use only.  */)
7870      (coding_system)
7871      Lisp_Object coding_system;
7872 {
7873   CHECK_SYMBOL (coding_system);
7874   setup_coding_system (Fcheck_coding_system (coding_system),
7875                        &safe_terminal_coding);
7876   /* Characer composition should be disabled.  */
7877   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7878   safe_terminal_coding.src_multibyte = 1;
7879   safe_terminal_coding.dst_multibyte = 0;
7880   return Qnil;
7881 }
7882
7883 DEFUN ("terminal-coding-system",
7884        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
7885        doc: /* Return coding system specified for terminal output.  */)
7886      ()
7887 {
7888   return CODING_ID_NAME (terminal_coding.id);
7889 }
7890
7891 DEFUN ("set-keyboard-coding-system-internal",
7892        Fset_keyboard_coding_system_internal,
7893        Sset_keyboard_coding_system_internal, 1, 1, 0,
7894        doc: /* Internal use only.  */)
7895      (coding_system)
7896      Lisp_Object coding_system;
7897 {
7898   CHECK_SYMBOL (coding_system);
7899   setup_coding_system (Fcheck_coding_system (coding_system),
7900                        &keyboard_coding);
7901   /* Characer composition should be disabled.  */
7902   keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7903   return Qnil;
7904 }
7905
7906 DEFUN ("keyboard-coding-system",
7907        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
7908        doc: /* Return coding system specified for decoding keyboard input.  */)
7909      ()
7910 {
7911   return CODING_ID_NAME (keyboard_coding.id);
7912 }
7913
7914 \f
7915 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7916        Sfind_operation_coding_system,  1, MANY, 0,
7917        doc: /* Choose a coding system for an operation based on the target name.
7918 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7919 DECODING-SYSTEM is the coding system to use for decoding
7920 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7921 for encoding (in case OPERATION does encoding).
7922
7923 The first argument OPERATION specifies an I/O primitive:
7924   For file I/O, `insert-file-contents' or `write-region'.
7925   For process I/O, `call-process', `call-process-region', or `start-process'.
7926   For network I/O, `open-network-stream'.
7927
7928 The remaining arguments should be the same arguments that were passed
7929 to the primitive.  Depending on which primitive, one of those arguments
7930 is selected as the TARGET.  For example, if OPERATION does file I/O,
7931 whichever argument specifies the file name is TARGET.
7932
7933 TARGET has a meaning which depends on OPERATION:
7934   For file I/O, TARGET is a file name.
7935   For process I/O, TARGET is a process name.
7936   For network I/O, TARGET is a service name or a port number
7937
7938 This function looks up what specified for TARGET in,
7939 `file-coding-system-alist', `process-coding-system-alist',
7940 or `network-coding-system-alist' depending on OPERATION.
7941 They may specify a coding system, a cons of coding systems,
7942 or a function symbol to call.
7943 In the last case, we call the function with one argument,
7944 which is a list of all the arguments given to this function.
7945
7946 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7947      (nargs, args)
7948      int nargs;
7949      Lisp_Object *args;
7950 {
7951   Lisp_Object operation, target_idx, target, val;
7952   register Lisp_Object chain;
7953
7954   if (nargs < 2)
7955     error ("Too few arguments");
7956   operation = args[0];
7957   if (!SYMBOLP (operation)
7958       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7959     error ("Invalid first arguement");
7960   if (nargs < 1 + XINT (target_idx))
7961     error ("Too few arguments for operation: %s",
7962            SDATA (SYMBOL_NAME (operation)));
7963   target = args[XINT (target_idx) + 1];
7964   if (!(STRINGP (target)
7965         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7966     error ("Invalid %dth argument", XINT (target_idx) + 1);
7967
7968   chain = ((EQ (operation, Qinsert_file_contents)
7969             || EQ (operation, Qwrite_region))
7970            ? Vfile_coding_system_alist
7971            : (EQ (operation, Qopen_network_stream)
7972               ? Vnetwork_coding_system_alist
7973               : Vprocess_coding_system_alist));
7974   if (NILP (chain))
7975     return Qnil;
7976
7977   for (; CONSP (chain); chain = XCDR (chain))
7978     {
7979       Lisp_Object elt;
7980
7981       elt = XCAR (chain);
7982       if (CONSP (elt)
7983           && ((STRINGP (target)
7984                && STRINGP (XCAR (elt))
7985                && fast_string_match (XCAR (elt), target) >= 0)
7986               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7987         {
7988           val = XCDR (elt);
7989           /* Here, if VAL is both a valid coding system and a valid
7990              function symbol, we return VAL as a coding system.  */
7991           if (CONSP (val))
7992             return val;
7993           if (! SYMBOLP (val))
7994             return Qnil;
7995           if (! NILP (Fcoding_system_p (val)))
7996             return Fcons (val, val);
7997           if (! NILP (Ffboundp (val)))
7998             {
7999               val = call1 (val, Flist (nargs, args));
8000               if (CONSP (val))
8001                 return val;
8002               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8003                 return Fcons (val, val);
8004             }
8005           return Qnil;
8006         }
8007     }
8008   return Qnil;
8009 }
8010
8011 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
8012        Sset_coding_system_priority, 0, MANY, 0,
8013        doc: /* Assign higher priority to the coding systems given as arguments.
8014 If multiple coding systems belongs to the same category,
8015 all but the first one are ignored.
8016
8017 usage: (set-coding-system-priority ...)  */)
8018      (nargs, args)
8019      int nargs;
8020      Lisp_Object *args;
8021 {
8022   int i, j;
8023   int changed[coding_category_max];
8024   enum coding_category priorities[coding_category_max];
8025
8026   bzero (changed, sizeof changed);
8027
8028   for (i = j = 0; i < nargs; i++)
8029     {
8030       enum coding_category category;
8031       Lisp_Object spec, attrs;
8032
8033       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8034       attrs = AREF (spec, 0);
8035       category = XINT (CODING_ATTR_CATEGORY (attrs));
8036       if (changed[category])
8037         /* Ignore this coding system because a coding system of the
8038            same category already had a higher priority.  */
8039         continue;
8040       changed[category] = 1;
8041       priorities[j++] = category;
8042       if (coding_categories[category].id >= 0
8043           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8044         setup_coding_system (args[i], &coding_categories[category]);
8045       Fset (AREF (Vcoding_category_table, category), args[i]);
8046     }
8047
8048   /* Now we have decided top J priorities.  Reflect the order of the
8049      original priorities to the remaining priorities.  */
8050
8051   for (i = j, j = 0; i < coding_category_max; i++, j++)
8052     {
8053       while (j < coding_category_max
8054              && changed[coding_priorities[j]])
8055         j++;
8056       if (j == coding_category_max)
8057         abort ();
8058       priorities[i] = coding_priorities[j];
8059     }
8060
8061   bcopy (priorities, coding_priorities, sizeof priorities);
8062
8063   /* Update `coding-category-list'.  */
8064   Vcoding_category_list = Qnil;
8065   for (i = coding_category_max - 1; i >= 0; i--)
8066     Vcoding_category_list
8067       = Fcons (AREF (Vcoding_category_table, priorities[i]),
8068                Vcoding_category_list);
8069
8070   return Qnil;
8071 }
8072
8073 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8074        Scoding_system_priority_list, 0, 1, 0,
8075        doc: /* Return a list of coding systems ordered by their priorities.
8076 HIGHESTP non-nil means just return the highest priority one.  */)
8077      (highestp)
8078      Lisp_Object highestp;
8079 {
8080   int i;
8081   Lisp_Object val;
8082
8083   for (i = 0, val = Qnil; i < coding_category_max; i++)
8084     {
8085       enum coding_category category = coding_priorities[i];
8086       int id = coding_categories[category].id;
8087       Lisp_Object attrs;
8088
8089       if (id < 0)
8090         continue;
8091       attrs = CODING_ID_ATTRS (id);
8092       if (! NILP (highestp))
8093         return CODING_ATTR_BASE_NAME (attrs);
8094       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8095     }
8096   return Fnreverse (val);
8097 }
8098
8099 static char *suffixes[] = { "-unix", "-dos", "-mac" };
8100
8101 static Lisp_Object
8102 make_subsidiaries (base)
8103      Lisp_Object base;
8104 {
8105   Lisp_Object subsidiaries;
8106   int base_name_len = SBYTES (SYMBOL_NAME (base));
8107   char *buf = (char *) alloca (base_name_len + 6);
8108   int i;
8109
8110   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
8111   subsidiaries = Fmake_vector (make_number (3), Qnil);
8112   for (i = 0; i < 3; i++)
8113     {
8114       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
8115       ASET (subsidiaries, i, intern (buf));
8116     }
8117   return subsidiaries;
8118 }
8119
8120
8121 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
8122        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
8123        doc: /* For internal use only.
8124 usage: (define-coding-system-internal ...)  */)
8125      (nargs, args)
8126      int nargs;
8127      Lisp_Object *args;
8128 {
8129   Lisp_Object name;
8130   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
8131   Lisp_Object attrs;            /* Vector of attributes.  */
8132   Lisp_Object eol_type;
8133   Lisp_Object aliases;
8134   Lisp_Object coding_type, charset_list, safe_charsets;
8135   enum coding_category category;
8136   Lisp_Object tail, val;
8137   int max_charset_id = 0;
8138   int i;
8139
8140   if (nargs < coding_arg_max)
8141     goto short_args;
8142
8143   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
8144
8145   name = args[coding_arg_name];
8146   CHECK_SYMBOL (name);
8147   CODING_ATTR_BASE_NAME (attrs) = name;
8148
8149   val = args[coding_arg_mnemonic];
8150   if (! STRINGP (val))
8151     CHECK_CHARACTER (val);
8152   CODING_ATTR_MNEMONIC (attrs) = val;
8153
8154   coding_type = args[coding_arg_coding_type];
8155   CHECK_SYMBOL (coding_type);
8156   CODING_ATTR_TYPE (attrs) = coding_type;
8157
8158   charset_list = args[coding_arg_charset_list];
8159   if (SYMBOLP (charset_list))
8160     {
8161       if (EQ (charset_list, Qiso_2022))
8162         {
8163           if (! EQ (coding_type, Qiso_2022))
8164             error ("Invalid charset-list");
8165           charset_list = Viso_2022_charset_list;
8166         }
8167       else if (EQ (charset_list, Qemacs_mule))
8168         {
8169           if (! EQ (coding_type, Qemacs_mule))
8170             error ("Invalid charset-list");
8171           charset_list = Vemacs_mule_charset_list;
8172         }
8173       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8174         if (max_charset_id < XFASTINT (XCAR (tail)))
8175           max_charset_id = XFASTINT (XCAR (tail));
8176     }
8177   else
8178     {
8179       charset_list = Fcopy_sequence (charset_list);
8180       for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
8181         {
8182           struct charset *charset;
8183
8184           val = Fcar (tail);
8185           CHECK_CHARSET_GET_CHARSET (val, charset);
8186           if (EQ (coding_type, Qiso_2022)
8187               ? CHARSET_ISO_FINAL (charset) < 0
8188               : EQ (coding_type, Qemacs_mule)
8189               ? CHARSET_EMACS_MULE_ID (charset) < 0
8190               : 0)
8191             error ("Can't handle charset `%s'",
8192                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8193
8194           XSETCAR (tail, make_number (charset->id));
8195           if (max_charset_id < charset->id)
8196             max_charset_id = charset->id;
8197         }
8198     }
8199   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
8200
8201   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8202                                 make_number (255));
8203   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8204     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
8205   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
8206
8207   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
8208
8209   val = args[coding_arg_decode_translation_table];
8210   if (! CHAR_TABLE_P (val) && ! CONSP (val))
8211     CHECK_SYMBOL (val);
8212   CODING_ATTR_DECODE_TBL (attrs) = val;
8213
8214   val = args[coding_arg_encode_translation_table];
8215   if (! CHAR_TABLE_P (val) && ! CONSP (val))
8216     CHECK_SYMBOL (val);
8217   CODING_ATTR_ENCODE_TBL (attrs) = val;
8218
8219   val = args[coding_arg_post_read_conversion];
8220   CHECK_SYMBOL (val);
8221   CODING_ATTR_POST_READ (attrs) = val;
8222
8223   val = args[coding_arg_pre_write_conversion];
8224   CHECK_SYMBOL (val);
8225   CODING_ATTR_PRE_WRITE (attrs) = val;
8226
8227   val = args[coding_arg_default_char];
8228   if (NILP (val))
8229     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8230   else
8231     {
8232       CHECK_CHARACTER (val);
8233       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8234     }
8235
8236   val = args[coding_arg_for_unibyte];
8237   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
8238
8239   val = args[coding_arg_plist];
8240   CHECK_LIST (val);
8241   CODING_ATTR_PLIST (attrs) = val;
8242
8243   if (EQ (coding_type, Qcharset))
8244     {
8245       /* Generate a lisp vector of 256 elements.  Each element is nil,
8246          integer, or a list of charset IDs.
8247
8248          If Nth element is nil, the byte code N is invalid in this
8249          coding system.
8250
8251          If Nth element is a number NUM, N is the first byte of a
8252          charset whose ID is NUM.
8253
8254          If Nth element is a list of charset IDs, N is the first byte
8255          of one of them.  The list is sorted by dimensions of the
8256          charsets.  A charset of smaller dimension comes firtst. */
8257       val = Fmake_vector (make_number (256), Qnil);
8258
8259       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8260         {
8261           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8262           int dim = CHARSET_DIMENSION (charset);
8263           int idx = (dim - 1) * 4;
8264
8265           if (CHARSET_ASCII_COMPATIBLE_P (charset))
8266             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8267
8268           for (i = charset->code_space[idx];
8269                i <= charset->code_space[idx + 1]; i++)
8270             {
8271               Lisp_Object tmp, tmp2;
8272               int dim2;
8273
8274               tmp = AREF (val, i);
8275               if (NILP (tmp))
8276                 tmp = XCAR (tail);
8277               else if (NUMBERP (tmp))
8278                 {
8279                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8280                   if (dim < dim2)
8281                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
8282                   else
8283                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
8284                 }
8285               else
8286                 {
8287                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8288                     {
8289                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8290                       if (dim < dim2)
8291                         break;
8292                     }
8293                   if (NILP (tmp2))
8294                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8295                   else
8296                     {
8297                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8298                       XSETCAR (tmp2, XCAR (tail));
8299                     }
8300                 }
8301               ASET (val, i, tmp);
8302             }
8303         }
8304       ASET (attrs, coding_attr_charset_valids, val);
8305       category = coding_category_charset;
8306     }
8307   else if (EQ (coding_type, Qccl))
8308     {
8309       Lisp_Object valids;
8310
8311       if (nargs < coding_arg_ccl_max)
8312         goto short_args;
8313
8314       val = args[coding_arg_ccl_decoder];
8315       CHECK_CCL_PROGRAM (val);
8316       if (VECTORP (val))
8317         val = Fcopy_sequence (val);
8318       ASET (attrs, coding_attr_ccl_decoder, val);
8319
8320       val = args[coding_arg_ccl_encoder];
8321       CHECK_CCL_PROGRAM (val);
8322       if (VECTORP (val))
8323         val = Fcopy_sequence (val);
8324       ASET (attrs, coding_attr_ccl_encoder, val);
8325
8326       val = args[coding_arg_ccl_valids];
8327       valids = Fmake_string (make_number (256), make_number (0));
8328       for (tail = val; !NILP (tail); tail = Fcdr (tail))
8329         {
8330           int from, to;
8331
8332           val = Fcar (tail);
8333           if (INTEGERP (val))
8334             {
8335               from = to = XINT (val);
8336               if (from < 0 || from > 255)
8337                 args_out_of_range_3 (val, make_number (0), make_number (255));
8338             }
8339           else
8340             {
8341               CHECK_CONS (val);
8342               CHECK_NATNUM_CAR (val);
8343               CHECK_NATNUM_CDR (val);
8344               from = XINT (XCAR (val));
8345               if (from > 255)
8346                 args_out_of_range_3 (XCAR (val),
8347                                      make_number (0), make_number (255));
8348               to = XINT (XCDR (val));
8349               if (to < from || to > 255)
8350                 args_out_of_range_3 (XCDR (val),
8351                                      XCAR (val), make_number (255));
8352             }
8353           for (i = from; i <= to; i++)
8354             SSET (valids, i, 1);
8355         }
8356       ASET (attrs, coding_attr_ccl_valids, valids);
8357
8358       category = coding_category_ccl;
8359     }
8360   else if (EQ (coding_type, Qutf_16))
8361     {
8362       Lisp_Object bom, endian;
8363
8364       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8365
8366       if (nargs < coding_arg_utf16_max)
8367         goto short_args;
8368
8369       bom = args[coding_arg_utf16_bom];
8370       if (! NILP (bom) && ! EQ (bom, Qt))
8371         {
8372           CHECK_CONS (bom);
8373           val = XCAR (bom);
8374           CHECK_CODING_SYSTEM (val);
8375           val = XCDR (bom);
8376           CHECK_CODING_SYSTEM (val);
8377         }
8378       ASET (attrs, coding_attr_utf_16_bom, bom);
8379
8380       endian = args[coding_arg_utf16_endian];
8381       CHECK_SYMBOL (endian);
8382       if (NILP (endian))
8383         endian = Qbig;
8384       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8385         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
8386       ASET (attrs, coding_attr_utf_16_endian, endian);
8387
8388       category = (CONSP (bom)
8389                   ? coding_category_utf_16_auto
8390                   : NILP (bom)
8391                   ? (EQ (endian, Qbig)
8392                      ? coding_category_utf_16_be_nosig
8393                      : coding_category_utf_16_le_nosig)
8394                   : (EQ (endian, Qbig)
8395                      ? coding_category_utf_16_be
8396                      : coding_category_utf_16_le));
8397     }
8398   else if (EQ (coding_type, Qiso_2022))
8399     {
8400       Lisp_Object initial, reg_usage, request, flags;
8401       int i;
8402
8403       if (nargs < coding_arg_iso2022_max)
8404         goto short_args;
8405
8406       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8407       CHECK_VECTOR (initial);
8408       for (i = 0; i < 4; i++)
8409         {
8410           val = Faref (initial, make_number (i));
8411           if (! NILP (val))
8412             {
8413               struct charset *charset;
8414
8415               CHECK_CHARSET_GET_CHARSET (val, charset);
8416               ASET (initial, i, make_number (CHARSET_ID (charset)));
8417               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8418                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8419             }
8420           else
8421             ASET (initial, i, make_number (-1));
8422         }
8423
8424       reg_usage = args[coding_arg_iso2022_reg_usage];
8425       CHECK_CONS (reg_usage);
8426       CHECK_NUMBER_CAR (reg_usage);
8427       CHECK_NUMBER_CDR (reg_usage);
8428
8429       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8430       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
8431         {
8432           int id;
8433           Lisp_Object tmp;
8434
8435           val = Fcar (tail);
8436           CHECK_CONS (val);
8437           tmp = XCAR (val);
8438           CHECK_CHARSET_GET_ID (tmp, id);
8439           CHECK_NATNUM_CDR (val);
8440           if (XINT (XCDR (val)) >= 4)
8441             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8442           XSETCAR (val, make_number (id));
8443         }
8444
8445       flags = args[coding_arg_iso2022_flags];
8446       CHECK_NATNUM (flags);
8447       i = XINT (flags);
8448       if (EQ (args[coding_arg_charset_list], Qiso_2022))
8449         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8450
8451       ASET (attrs, coding_attr_iso_initial, initial);
8452       ASET (attrs, coding_attr_iso_usage, reg_usage);
8453       ASET (attrs, coding_attr_iso_request, request);
8454       ASET (attrs, coding_attr_iso_flags, flags);
8455       setup_iso_safe_charsets (attrs);
8456
8457       if (i & CODING_ISO_FLAG_SEVEN_BITS)
8458         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8459                           | CODING_ISO_FLAG_SINGLE_SHIFT))
8460                     ? coding_category_iso_7_else
8461                     : EQ (args[coding_arg_charset_list], Qiso_2022)
8462                     ? coding_category_iso_7
8463                     : coding_category_iso_7_tight);
8464       else
8465         {
8466           int id = XINT (AREF (initial, 1));
8467
8468           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
8469                        || EQ (args[coding_arg_charset_list], Qiso_2022)
8470                        || id < 0)
8471                       ? coding_category_iso_8_else
8472                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8473                       ? coding_category_iso_8_1
8474                       : coding_category_iso_8_2);
8475         }
8476       if (category != coding_category_iso_8_1
8477           && category != coding_category_iso_8_2)
8478         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8479     }
8480   else if (EQ (coding_type, Qemacs_mule))
8481     {
8482       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8483         ASET (attrs, coding_attr_emacs_mule_full, Qt);
8484       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8485       category = coding_category_emacs_mule;
8486     }
8487   else if (EQ (coding_type, Qshift_jis))
8488     {
8489
8490       struct charset *charset;
8491
8492       if (XINT (Flength (charset_list)) != 3
8493           && XINT (Flength (charset_list)) != 4)
8494         error ("There should be three or four charsets");
8495
8496       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8497       if (CHARSET_DIMENSION (charset) != 1)
8498         error ("Dimension of charset %s is not one",
8499                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8500       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8501         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8502
8503       charset_list = XCDR (charset_list);
8504       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8505       if (CHARSET_DIMENSION (charset) != 1)
8506         error ("Dimension of charset %s is not one",
8507                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8508
8509       charset_list = XCDR (charset_list);
8510       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8511       if (CHARSET_DIMENSION (charset) != 2)
8512         error ("Dimension of charset %s is not two",
8513                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8514
8515       charset_list = XCDR (charset_list);
8516       if (! NILP (charset_list))
8517         {
8518           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8519           if (CHARSET_DIMENSION (charset) != 2)
8520             error ("Dimension of charset %s is not two",
8521                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8522         }
8523
8524       category = coding_category_sjis;
8525       Vsjis_coding_system = name;
8526     }
8527   else if (EQ (coding_type, Qbig5))
8528     {
8529       struct charset *charset;
8530
8531       if (XINT (Flength (charset_list)) != 2)
8532         error ("There should be just two charsets");
8533
8534       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8535       if (CHARSET_DIMENSION (charset) != 1)
8536         error ("Dimension of charset %s is not one",
8537                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8538       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8539         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8540
8541       charset_list = XCDR (charset_list);
8542       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8543       if (CHARSET_DIMENSION (charset) != 2)
8544         error ("Dimension of charset %s is not two",
8545                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8546
8547       category = coding_category_big5;
8548       Vbig5_coding_system = name;
8549     }
8550   else if (EQ (coding_type, Qraw_text))
8551     {
8552       category = coding_category_raw_text;
8553       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8554     }
8555   else if (EQ (coding_type, Qutf_8))
8556     {
8557       category = coding_category_utf_8;
8558       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8559     }
8560   else if (EQ (coding_type, Qundecided))
8561     category = coding_category_undecided;
8562   else
8563     error ("Invalid coding system type: %s",
8564            SDATA (SYMBOL_NAME (coding_type)));
8565
8566   CODING_ATTR_CATEGORY (attrs) = make_number (category);
8567   CODING_ATTR_PLIST (attrs)
8568     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
8569                                 CODING_ATTR_PLIST (attrs)));
8570
8571   eol_type = args[coding_arg_eol_type];
8572   if (! NILP (eol_type)
8573       && ! EQ (eol_type, Qunix)
8574       && ! EQ (eol_type, Qdos)
8575       && ! EQ (eol_type, Qmac))
8576     error ("Invalid eol-type");
8577
8578   aliases = Fcons (name, Qnil);
8579
8580   if (NILP (eol_type))
8581     {
8582       eol_type = make_subsidiaries (name);
8583       for (i = 0; i < 3; i++)
8584         {
8585           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8586
8587           this_name = AREF (eol_type, i);
8588           this_aliases = Fcons (this_name, Qnil);
8589           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8590           this_spec = Fmake_vector (make_number (3), attrs);
8591           ASET (this_spec, 1, this_aliases);
8592           ASET (this_spec, 2, this_eol_type);
8593           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8594           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8595           Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8596                                         Vcoding_system_alist);
8597         }
8598     }
8599
8600   spec_vec = Fmake_vector (make_number (3), attrs);
8601   ASET (spec_vec, 1, aliases);
8602   ASET (spec_vec, 2, eol_type);
8603
8604   Fputhash (name, spec_vec, Vcoding_system_hash_table);
8605   Vcoding_system_list = Fcons (name, Vcoding_system_list);
8606   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8607                                 Vcoding_system_alist);
8608
8609   {
8610     int id = coding_categories[category].id;
8611
8612     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8613       setup_coding_system (name, &coding_categories[category]);
8614   }
8615
8616   return Qnil;
8617
8618  short_args:
8619   return Fsignal (Qwrong_number_of_arguments,
8620                   Fcons (intern ("define-coding-system-internal"),
8621                          make_number (nargs)));
8622 }
8623
8624
8625 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
8626        3, 3, 0,
8627        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
8628   (coding_system, prop, val)
8629      Lisp_Object coding_system, prop, val;
8630 {
8631   Lisp_Object spec, attrs, plist;
8632
8633   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8634   attrs = AREF (spec, 0);
8635   if (EQ (prop, QCmnemonic))
8636     {
8637       if (! STRINGP (val))
8638         CHECK_CHARACTER (val);
8639       CODING_ATTR_MNEMONIC (attrs) = val;
8640     }
8641   else if (EQ (prop, QCdefalut_char))
8642     {
8643       if (NILP (val))
8644         val = make_number (' ');
8645       else
8646         CHECK_CHARACTER (val);
8647       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8648     }
8649   else if (EQ (prop, QCdecode_translation_table))
8650     {
8651       if (! CHAR_TABLE_P (val) && ! CONSP (val))
8652         CHECK_SYMBOL (val);
8653       CODING_ATTR_DECODE_TBL (attrs) = val;
8654     }
8655   else if (EQ (prop, QCencode_translation_table))
8656     {
8657       if (! CHAR_TABLE_P (val) && ! CONSP (val))
8658         CHECK_SYMBOL (val);
8659       CODING_ATTR_ENCODE_TBL (attrs) = val;
8660     }
8661   else if (EQ (prop, QCpost_read_conversion))
8662     {
8663       CHECK_SYMBOL (val);
8664       CODING_ATTR_POST_READ (attrs) = val;
8665     }
8666   else if (EQ (prop, QCpre_write_conversion))
8667     {
8668       CHECK_SYMBOL (val);
8669       CODING_ATTR_PRE_WRITE (attrs) = val;
8670     }
8671
8672   CODING_ATTR_PLIST (attrs)
8673     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
8674   return val;
8675 }
8676
8677
8678 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8679        Sdefine_coding_system_alias, 2, 2, 0,
8680        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
8681      (alias, coding_system)
8682      Lisp_Object alias, coding_system;
8683 {
8684   Lisp_Object spec, aliases, eol_type;
8685
8686   CHECK_SYMBOL (alias);
8687   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8688   aliases = AREF (spec, 1);
8689   /* ALISES should be a list of length more than zero, and the first
8690      element is a base coding system.  Append ALIAS at the tail of the
8691      list.  */
8692   while (!NILP (XCDR (aliases)))
8693     aliases = XCDR (aliases);
8694   XSETCDR (aliases, Fcons (alias, Qnil));
8695
8696   eol_type = AREF (spec, 2);
8697   if (VECTORP (eol_type))
8698     {
8699       Lisp_Object subsidiaries;
8700       int i;
8701
8702       subsidiaries = make_subsidiaries (alias);
8703       for (i = 0; i < 3; i++)
8704         Fdefine_coding_system_alias (AREF (subsidiaries, i),
8705                                      AREF (eol_type, i));
8706     }
8707
8708   Fputhash (alias, spec, Vcoding_system_hash_table);
8709   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
8710   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8711                                 Vcoding_system_alist);
8712
8713   return Qnil;
8714 }
8715
8716 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8717        1, 1, 0,
8718        doc: /* Return the base of CODING-SYSTEM.
8719 Any alias or subsidiary coding system is not a base coding system.  */)
8720   (coding_system)
8721      Lisp_Object coding_system;
8722 {
8723   Lisp_Object spec, attrs;
8724
8725   if (NILP (coding_system))
8726     return (Qno_conversion);
8727   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8728   attrs = AREF (spec, 0);
8729   return CODING_ATTR_BASE_NAME (attrs);
8730 }
8731
8732 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8733        1, 1, 0,
8734        doc: "Return the property list of CODING-SYSTEM.")
8735      (coding_system)
8736      Lisp_Object coding_system;
8737 {
8738   Lisp_Object spec, attrs;
8739
8740   if (NILP (coding_system))
8741     coding_system = Qno_conversion;
8742   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8743   attrs = AREF (spec, 0);
8744   return CODING_ATTR_PLIST (attrs);
8745 }
8746
8747
8748 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8749        1, 1, 0,
8750        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
8751      (coding_system)
8752      Lisp_Object coding_system;
8753 {
8754   Lisp_Object spec;
8755
8756   if (NILP (coding_system))
8757     coding_system = Qno_conversion;
8758   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8759   return AREF (spec, 1);
8760 }
8761
8762 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8763        Scoding_system_eol_type, 1, 1, 0,
8764        doc: /* Return eol-type of CODING-SYSTEM.
8765 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8766
8767 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8768 and CR respectively.
8769
8770 A vector value indicates that a format of end-of-line should be
8771 detected automatically.  Nth element of the vector is the subsidiary
8772 coding system whose eol-type is N.  */)
8773      (coding_system)
8774      Lisp_Object coding_system;
8775 {
8776   Lisp_Object spec, eol_type;
8777   int n;
8778
8779   if (NILP (coding_system))
8780     coding_system = Qno_conversion;
8781   if (! CODING_SYSTEM_P (coding_system))
8782     return Qnil;
8783   spec = CODING_SYSTEM_SPEC (coding_system);
8784   eol_type = AREF (spec, 2);
8785   if (VECTORP (eol_type))
8786     return Fcopy_sequence (eol_type);
8787   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8788   return make_number (n);
8789 }
8790
8791 #endif /* emacs */
8792
8793 \f
8794 /*** 9. Post-amble ***/
8795
8796 void
8797 init_coding_once ()
8798 {
8799   int i;
8800
8801   for (i = 0; i < coding_category_max; i++)
8802     {
8803       coding_categories[i].id = -1;
8804       coding_priorities[i] = i;
8805     }
8806
8807   /* ISO2022 specific initialize routine.  */
8808   for (i = 0; i < 0x20; i++)
8809     iso_code_class[i] = ISO_control_0;
8810   for (i = 0x21; i < 0x7F; i++)
8811     iso_code_class[i] = ISO_graphic_plane_0;
8812   for (i = 0x80; i < 0xA0; i++)
8813     iso_code_class[i] = ISO_control_1;
8814   for (i = 0xA1; i < 0xFF; i++)
8815     iso_code_class[i] = ISO_graphic_plane_1;
8816   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8817   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8818   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8819   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8820   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8821   iso_code_class[ISO_CODE_ESC] = ISO_escape;
8822   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8823   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
8824   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
8825
8826   for (i = 0; i < 256; i++)
8827     {
8828       emacs_mule_bytes[i] = 1;
8829     }
8830   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
8831   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
8832   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
8833   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
8834 }
8835
8836 #ifdef emacs
8837
8838 void
8839 syms_of_coding ()
8840 {
8841   staticpro (&Vcoding_system_hash_table);
8842   {
8843     Lisp_Object args[2];
8844     args[0] = QCtest;
8845     args[1] = Qeq;
8846     Vcoding_system_hash_table = Fmake_hash_table (2, args);
8847   }
8848
8849   staticpro (&Vsjis_coding_system);
8850   Vsjis_coding_system = Qnil;
8851
8852   staticpro (&Vbig5_coding_system);
8853   Vbig5_coding_system = Qnil;
8854
8855   staticpro (&Vcode_conversion_reused_workbuf);
8856   Vcode_conversion_reused_workbuf = Qnil;
8857
8858   staticpro (&Vcode_conversion_workbuf_name);
8859   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
8860
8861   reused_workbuf_in_use = 0;
8862
8863   DEFSYM (Qcharset, "charset");
8864   DEFSYM (Qtarget_idx, "target-idx");
8865   DEFSYM (Qcoding_system_history, "coding-system-history");
8866   Fset (Qcoding_system_history, Qnil);
8867
8868   /* Target FILENAME is the first argument.  */
8869   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
8870   /* Target FILENAME is the third argument.  */
8871   Fput (Qwrite_region, Qtarget_idx, make_number (2));
8872
8873   DEFSYM (Qcall_process, "call-process");
8874   /* Target PROGRAM is the first argument.  */
8875   Fput (Qcall_process, Qtarget_idx, make_number (0));
8876
8877   DEFSYM (Qcall_process_region, "call-process-region");
8878   /* Target PROGRAM is the third argument.  */
8879   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
8880
8881   DEFSYM (Qstart_process, "start-process");
8882   /* Target PROGRAM is the third argument.  */
8883   Fput (Qstart_process, Qtarget_idx, make_number (2));
8884
8885   DEFSYM (Qopen_network_stream, "open-network-stream");
8886   /* Target SERVICE is the fourth argument.  */
8887   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8888
8889   DEFSYM (Qcoding_system, "coding-system");
8890   DEFSYM (Qcoding_aliases, "coding-aliases");
8891
8892   DEFSYM (Qeol_type, "eol-type");
8893   DEFSYM (Qunix, "unix");
8894   DEFSYM (Qdos, "dos");
8895
8896   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8897   DEFSYM (Qpost_read_conversion, "post-read-conversion");
8898   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8899   DEFSYM (Qdefault_char, "default-char");
8900   DEFSYM (Qundecided, "undecided");
8901   DEFSYM (Qno_conversion, "no-conversion");
8902   DEFSYM (Qraw_text, "raw-text");
8903
8904   DEFSYM (Qiso_2022, "iso-2022");
8905
8906   DEFSYM (Qutf_8, "utf-8");
8907   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
8908
8909   DEFSYM (Qutf_16, "utf-16");
8910   DEFSYM (Qbig, "big");
8911   DEFSYM (Qlittle, "little");
8912
8913   DEFSYM (Qshift_jis, "shift-jis");
8914   DEFSYM (Qbig5, "big5");
8915
8916   DEFSYM (Qcoding_system_p, "coding-system-p");
8917
8918   DEFSYM (Qcoding_system_error, "coding-system-error");
8919   Fput (Qcoding_system_error, Qerror_conditions,
8920         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
8921   Fput (Qcoding_system_error, Qerror_message,
8922         build_string ("Invalid coding system"));
8923
8924   /* Intern this now in case it isn't already done.
8925      Setting this variable twice is harmless.
8926      But don't staticpro it here--that is done in alloc.c.  */
8927   Qchar_table_extra_slots = intern ("char-table-extra-slots");
8928
8929   DEFSYM (Qtranslation_table, "translation-table");
8930   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
8931   DEFSYM (Qtranslation_table_id, "translation-table-id");
8932   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8933   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
8934
8935   DEFSYM (Qvalid_codes, "valid-codes");
8936
8937   DEFSYM (Qemacs_mule, "emacs-mule");
8938
8939   DEFSYM (QCcategory, ":category");
8940   DEFSYM (QCmnemonic, ":mnemonic");
8941   DEFSYM (QCdefalut_char, ":default-char");
8942   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
8943   DEFSYM (QCencode_translation_table, ":encode-translation-table");
8944   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
8945   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
8946
8947   Vcoding_category_table
8948     = Fmake_vector (make_number (coding_category_max), Qnil);
8949   staticpro (&Vcoding_category_table);
8950   /* Followings are target of code detection.  */
8951   ASET (Vcoding_category_table, coding_category_iso_7,
8952         intern ("coding-category-iso-7"));
8953   ASET (Vcoding_category_table, coding_category_iso_7_tight,
8954         intern ("coding-category-iso-7-tight"));
8955   ASET (Vcoding_category_table, coding_category_iso_8_1,
8956         intern ("coding-category-iso-8-1"));
8957   ASET (Vcoding_category_table, coding_category_iso_8_2,
8958         intern ("coding-category-iso-8-2"));
8959   ASET (Vcoding_category_table, coding_category_iso_7_else,
8960         intern ("coding-category-iso-7-else"));
8961   ASET (Vcoding_category_table, coding_category_iso_8_else,
8962         intern ("coding-category-iso-8-else"));
8963   ASET (Vcoding_category_table, coding_category_utf_8,
8964         intern ("coding-category-utf-8"));
8965   ASET (Vcoding_category_table, coding_category_utf_16_be,
8966         intern ("coding-category-utf-16-be"));
8967   ASET (Vcoding_category_table, coding_category_utf_16_auto,
8968         intern ("coding-category-utf-16-auto"));
8969   ASET (Vcoding_category_table, coding_category_utf_16_le,
8970         intern ("coding-category-utf-16-le"));
8971   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8972         intern ("coding-category-utf-16-be-nosig"));
8973   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8974         intern ("coding-category-utf-16-le-nosig"));
8975   ASET (Vcoding_category_table, coding_category_charset,
8976         intern ("coding-category-charset"));
8977   ASET (Vcoding_category_table, coding_category_sjis,
8978         intern ("coding-category-sjis"));
8979   ASET (Vcoding_category_table, coding_category_big5,
8980         intern ("coding-category-big5"));
8981   ASET (Vcoding_category_table, coding_category_ccl,
8982         intern ("coding-category-ccl"));
8983   ASET (Vcoding_category_table, coding_category_emacs_mule,
8984         intern ("coding-category-emacs-mule"));
8985   /* Followings are NOT target of code detection.  */
8986   ASET (Vcoding_category_table, coding_category_raw_text,
8987         intern ("coding-category-raw-text"));
8988   ASET (Vcoding_category_table, coding_category_undecided,
8989         intern ("coding-category-undecided"));
8990
8991   DEFSYM (Qinsufficient_source, "insufficient-source");
8992   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
8993   DEFSYM (Qinvalid_source, "invalid-source");
8994   DEFSYM (Qinterrupted, "interrupted");
8995   DEFSYM (Qinsufficient_memory, "insufficient-memory");
8996
8997   defsubr (&Scoding_system_p);
8998   defsubr (&Sread_coding_system);
8999   defsubr (&Sread_non_nil_coding_system);
9000   defsubr (&Scheck_coding_system);
9001   defsubr (&Sdetect_coding_region);
9002   defsubr (&Sdetect_coding_string);
9003   defsubr (&Sfind_coding_systems_region_internal);
9004   defsubr (&Sunencodable_char_position);
9005   defsubr (&Scheck_coding_systems_region);
9006   defsubr (&Sdecode_coding_region);
9007   defsubr (&Sencode_coding_region);
9008   defsubr (&Sdecode_coding_string);
9009   defsubr (&Sencode_coding_string);
9010   defsubr (&Sdecode_sjis_char);
9011   defsubr (&Sencode_sjis_char);
9012   defsubr (&Sdecode_big5_char);
9013   defsubr (&Sencode_big5_char);
9014   defsubr (&Sset_terminal_coding_system_internal);
9015   defsubr (&Sset_safe_terminal_coding_system_internal);
9016   defsubr (&Sterminal_coding_system);
9017   defsubr (&Sset_keyboard_coding_system_internal);
9018   defsubr (&Skeyboard_coding_system);
9019   defsubr (&Sfind_operation_coding_system);
9020   defsubr (&Sset_coding_system_priority);
9021   defsubr (&Sdefine_coding_system_internal);
9022   defsubr (&Sdefine_coding_system_alias);
9023   defsubr (&Scoding_system_put);
9024   defsubr (&Scoding_system_base);
9025   defsubr (&Scoding_system_plist);
9026   defsubr (&Scoding_system_aliases);
9027   defsubr (&Scoding_system_eol_type);
9028   defsubr (&Scoding_system_priority_list);
9029
9030   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
9031                doc: /* List of coding systems.
9032
9033 Do not alter the value of this variable manually.  This variable should be
9034 updated by the functions `define-coding-system' and
9035 `define-coding-system-alias'.  */);
9036   Vcoding_system_list = Qnil;
9037
9038   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
9039                doc: /* Alist of coding system names.
9040 Each element is one element list of coding system name.
9041 This variable is given to `completing-read' as TABLE argument.
9042
9043 Do not alter the value of this variable manually.  This variable should be
9044 updated by the functions `make-coding-system' and
9045 `define-coding-system-alias'.  */);
9046   Vcoding_system_alist = Qnil;
9047
9048   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
9049                doc: /* List of coding-categories (symbols) ordered by priority.
9050
9051 On detecting a coding system, Emacs tries code detection algorithms
9052 associated with each coding-category one by one in this order.  When
9053 one algorithm agrees with a byte sequence of source text, the coding
9054 system bound to the corresponding coding-category is selected.  */);
9055   {
9056     int i;
9057
9058     Vcoding_category_list = Qnil;
9059     for (i = coding_category_max - 1; i >= 0; i--)
9060       Vcoding_category_list
9061         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
9062                  Vcoding_category_list);
9063   }
9064
9065   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
9066                doc: /* Specify the coding system for read operations.
9067 It is useful to bind this variable with `let', but do not set it globally.
9068 If the value is a coding system, it is used for decoding on read operation.
9069 If not, an appropriate element is used from one of the coding system alists:
9070 There are three such tables, `file-coding-system-alist',
9071 `process-coding-system-alist', and `network-coding-system-alist'.  */);
9072   Vcoding_system_for_read = Qnil;
9073
9074   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
9075                doc: /* Specify the coding system for write operations.
9076 Programs bind this variable with `let', but you should not set it globally.
9077 If the value is a coding system, it is used for encoding of output,
9078 when writing it to a file and when sending it to a file or subprocess.
9079
9080 If this does not specify a coding system, an appropriate element
9081 is used from one of the coding system alists:
9082 There are three such tables, `file-coding-system-alist',
9083 `process-coding-system-alist', and `network-coding-system-alist'.
9084 For output to files, if the above procedure does not specify a coding system,
9085 the value of `buffer-file-coding-system' is used.  */);
9086   Vcoding_system_for_write = Qnil;
9087
9088   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
9089                doc: /*
9090 Coding system used in the latest file or process I/O.  */);
9091   Vlast_coding_system_used = Qnil;
9092
9093   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
9094                doc: /*
9095 Error status of the last code conversion.
9096
9097 When an error was detected in the last code conversion, this variable
9098 is set to one of the following symbols.
9099   `insufficient-source'
9100   `inconsistent-eol'
9101   `invalid-source'
9102   `interrupted'
9103   `insufficient-memory'
9104 When no error was detected, the value doesn't change.  So, to check
9105 the error status of a code conversion by this variable, you must
9106 explicitly set this variable to nil before performing code
9107 conversion.  */);
9108   Vlast_code_conversion_error = Qnil;
9109
9110   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
9111                doc: /*
9112 *Non-nil means always inhibit code conversion of end-of-line format.
9113 See info node `Coding Systems' and info node `Text and Binary' concerning
9114 such conversion.  */);
9115   inhibit_eol_conversion = 0;
9116
9117   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
9118                doc: /*
9119 Non-nil means process buffer inherits coding system of process output.
9120 Bind it to t if the process output is to be treated as if it were a file
9121 read from some filesystem.  */);
9122   inherit_process_coding_system = 0;
9123
9124   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
9125                doc: /*
9126 Alist to decide a coding system to use for a file I/O operation.
9127 The format is ((PATTERN . VAL) ...),
9128 where PATTERN is a regular expression matching a file name,
9129 VAL is a coding system, a cons of coding systems, or a function symbol.
9130 If VAL is a coding system, it is used for both decoding and encoding
9131 the file contents.
9132 If VAL is a cons of coding systems, the car part is used for decoding,
9133 and the cdr part is used for encoding.
9134 If VAL is a function symbol, the function must return a coding system
9135 or a cons of coding systems which are used as above.  The function gets
9136 the arguments with which `find-operation-coding-systems' was called.
9137
9138 See also the function `find-operation-coding-system'
9139 and the variable `auto-coding-alist'.  */);
9140   Vfile_coding_system_alist = Qnil;
9141
9142   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
9143                doc: /*
9144 Alist to decide a coding system to use for a process I/O operation.
9145 The format is ((PATTERN . VAL) ...),
9146 where PATTERN is a regular expression matching a program name,
9147 VAL is a coding system, a cons of coding systems, or a function symbol.
9148 If VAL is a coding system, it is used for both decoding what received
9149 from the program and encoding what sent to the program.
9150 If VAL is a cons of coding systems, the car part is used for decoding,
9151 and the cdr part is used for encoding.
9152 If VAL is a function symbol, the function must return a coding system
9153 or a cons of coding systems which are used as above.
9154
9155 See also the function `find-operation-coding-system'.  */);
9156   Vprocess_coding_system_alist = Qnil;
9157
9158   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
9159                doc: /*
9160 Alist to decide a coding system to use for a network I/O operation.
9161 The format is ((PATTERN . VAL) ...),
9162 where PATTERN is a regular expression matching a network service name
9163 or is a port number to connect to,
9164 VAL is a coding system, a cons of coding systems, or a function symbol.
9165 If VAL is a coding system, it is used for both decoding what received
9166 from the network stream and encoding what sent to the network stream.
9167 If VAL is a cons of coding systems, the car part is used for decoding,
9168 and the cdr part is used for encoding.
9169 If VAL is a function symbol, the function must return a coding system
9170 or a cons of coding systems which are used as above.
9171
9172 See also the function `find-operation-coding-system'.  */);
9173   Vnetwork_coding_system_alist = Qnil;
9174
9175   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
9176                doc: /* Coding system to use with system messages.
9177 Also used for decoding keyboard input on X Window system.  */);
9178   Vlocale_coding_system = Qnil;
9179
9180   /* The eol mnemonics are reset in startup.el system-dependently.  */
9181   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
9182                doc: /*
9183 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
9184   eol_mnemonic_unix = build_string (":");
9185
9186   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
9187                doc: /*
9188 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
9189   eol_mnemonic_dos = build_string ("\\");
9190
9191   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
9192                doc: /*
9193 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
9194   eol_mnemonic_mac = build_string ("/");
9195
9196   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
9197                doc: /*
9198 *String displayed in mode line when end-of-line format is not yet determined.  */);
9199   eol_mnemonic_undecided = build_string (":");
9200
9201   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
9202                doc: /*
9203 *Non-nil enables character translation while encoding and decoding.  */);
9204   Venable_character_translation = Qt;
9205
9206   DEFVAR_LISP ("standard-translation-table-for-decode",
9207                &Vstandard_translation_table_for_decode,
9208                doc: /* Table for translating characters while decoding.  */);
9209   Vstandard_translation_table_for_decode = Qnil;
9210
9211   DEFVAR_LISP ("standard-translation-table-for-encode",
9212                &Vstandard_translation_table_for_encode,
9213                doc: /* Table for translating characters while encoding.  */);
9214   Vstandard_translation_table_for_encode = Qnil;
9215
9216   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
9217                doc: /* Alist of charsets vs revision numbers.
9218 While encoding, if a charset (car part of an element) is found,
9219 designate it with the escape sequence identifying revision (cdr part
9220 of the element).  */);
9221   Vcharset_revision_table = Qnil;
9222
9223   DEFVAR_LISP ("default-process-coding-system",
9224                &Vdefault_process_coding_system,
9225                doc: /* Cons of coding systems used for process I/O by default.
9226 The car part is used for decoding a process output,
9227 the cdr part is used for encoding a text to be sent to a process.  */);
9228   Vdefault_process_coding_system = Qnil;
9229
9230   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
9231                doc: /*
9232 Table of extra Latin codes in the range 128..159 (inclusive).
9233 This is a vector of length 256.
9234 If Nth element is non-nil, the existence of code N in a file
9235 \(or output of subprocess) doesn't prevent it to be detected as
9236 a coding system of ISO 2022 variant which has a flag
9237 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9238 or reading output of a subprocess.
9239 Only 128th through 159th elements has a meaning.  */);
9240   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
9241
9242   DEFVAR_LISP ("select-safe-coding-system-function",
9243                &Vselect_safe_coding_system_function,
9244                doc: /*
9245 Function to call to select safe coding system for encoding a text.
9246
9247 If set, this function is called to force a user to select a proper
9248 coding system which can encode the text in the case that a default
9249 coding system used in each operation can't encode the text.
9250
9251 The default value is `select-safe-coding-system' (which see).  */);
9252   Vselect_safe_coding_system_function = Qnil;
9253
9254   DEFVAR_BOOL ("coding-system-require-warning",
9255                &coding_system_require_warning,
9256                doc: /* Internal use only.
9257 If non-nil, on writing a file, `select-safe-coding-system-function' is
9258 called even if `coding-system-for-write' is non-nil.  The command
9259 `universal-coding-system-argument' binds this variable to t temporarily.  */);
9260   coding_system_require_warning = 0;
9261
9262
9263   DEFVAR_BOOL ("inhibit-iso-escape-detection",
9264                &inhibit_iso_escape_detection,
9265                doc: /*
9266 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9267
9268 By default, on reading a file, Emacs tries to detect how the text is
9269 encoded.  This code detection is sensitive to escape sequences.  If
9270 the sequence is valid as ISO2022, the code is determined as one of
9271 the ISO2022 encodings, and the file is decoded by the corresponding
9272 coding system (e.g. `iso-2022-7bit').
9273
9274 However, there may be a case that you want to read escape sequences in
9275 a file as is.  In such a case, you can set this variable to non-nil.
9276 Then, as the code detection ignores any escape sequences, no file is
9277 detected as encoded in some ISO2022 encoding.  The result is that all
9278 escape sequences become visible in a buffer.
9279
9280 The default value is nil, and it is strongly recommended not to change
9281 it.  That is because many Emacs Lisp source files that contain
9282 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9283 in Emacs's distribution, and they won't be decoded correctly on
9284 reading if you suppress escape sequence detection.
9285
9286 The other way to read escape sequences in a file without decoding is
9287 to explicitly specify some coding system that doesn't use ISO2022's
9288 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
9289   inhibit_iso_escape_detection = 0;
9290
9291   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
9292                doc: /* Char table for translating self-inserting characters.
9293 This is applied to the result of input methods, not their input.  See also
9294 `keyboard-translate-table'.  */);
9295     Vtranslation_table_for_input = Qnil;
9296
9297   {
9298     Lisp_Object args[coding_arg_max];
9299     Lisp_Object plist[16];
9300     int i;
9301
9302     for (i = 0; i < coding_arg_max; i++)
9303       args[i] = Qnil;
9304
9305     plist[0] = intern (":name");
9306     plist[1] = args[coding_arg_name] = Qno_conversion;
9307     plist[2] = intern (":mnemonic");
9308     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
9309     plist[4] = intern (":coding-type");
9310     plist[5] = args[coding_arg_coding_type] = Qraw_text;
9311     plist[6] = intern (":ascii-compatible-p");
9312     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
9313     plist[8] = intern (":default-char");
9314     plist[9] = args[coding_arg_default_char] = make_number (0);
9315     plist[10] = intern (":for-unibyte");
9316     plist[11] = args[coding_arg_for_unibyte] = Qt;
9317     plist[12] = intern (":docstring");
9318     plist[13] = build_string ("Do no conversion.\n\
9319 \n\
9320 When you visit a file with this coding, the file is read into a\n\
9321 unibyte buffer as is, thus each byte of a file is treated as a\n\
9322 character.");
9323     plist[14] = intern (":eol-type");
9324     plist[15] = args[coding_arg_eol_type] = Qunix;
9325     args[coding_arg_plist] = Flist (16, plist);
9326     Fdefine_coding_system_internal (coding_arg_max, args);
9327   }
9328
9329   setup_coding_system (Qno_conversion, &keyboard_coding);
9330   setup_coding_system (Qno_conversion, &terminal_coding);
9331   setup_coding_system (Qno_conversion, &safe_terminal_coding);
9332
9333   {
9334     int i;
9335
9336     for (i = 0; i < coding_category_max; i++)
9337       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
9338   }
9339 }
9340
9341 char *
9342 emacs_strerror (error_number)
9343      int error_number;
9344 {
9345   char *str;
9346
9347   synchronize_system_messages_locale ();
9348   str = strerror (error_number);
9349
9350   if (! NILP (Vlocale_coding_system))
9351     {
9352       Lisp_Object dec = code_convert_string_norecord (build_string (str),
9353                                                       Vlocale_coding_system,
9354                                                       0);
9355       str = (char *) SDATA (dec);
9356     }
9357
9358   return str;
9359 }
9360
9361 #endif /* emacs */