src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3      Licensed to the Free Software Foundation.
   4    Copyright (C) 2001, 2002 Free Software Foundation, Inc.
   5    Copyright (C) 2003
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H13PRO009
   8
   9 This file is part of GNU Emacs.
  10
  11 GNU Emacs is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU General Public License as published by
  13 the Free Software Foundation; either version 2, or (at your option)
  14 any later version.
  15
  16 GNU Emacs is distributed in the hope that it will be useful,
  17 but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 GNU General Public License for more details.
  20
  21 You should have received a copy of the GNU General Public License
  22 along with GNU Emacs; see the file COPYING.  If not, write to
  23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  24 Boston, MA 02111-1307, USA.  */
  25
  26 /*** TABLE OF CONTENTS ***
  27
  28   0. General comments
  29   1. Preamble
  30   2. Emacs' internal format (emacs-utf-8) handlers
  31   3. UTF-8 handlers
  32   4. UTF-16 handlers
  33   5. Charset-base coding systems handlers
  34   6. emacs-mule (old Emacs' internal format) handlers
  35   7. ISO2022 handlers
  36   8. Shift-JIS and BIG5 handlers
  37   9. CCL handlers
  38   10. C library functions
  39   11. Emacs Lisp library functions
  40   12. Postamble
  41
  42 */
  43
  44 /*** 0. General comments ***
  45
  46
  47 CODING SYSTEM
  48
  49   A coding system is an object for an encoding mechanism that contains
  50   information about how to convert byte sequences to character
  51   sequences and vice versa.  When we say "decode", it means converting
  52   a byte sequence of a specific coding system into a character
  53   sequence that is represented by Emacs' internal coding system
  54   `emacs-utf-8', and when we say "encode", it means converting a
  55   character sequence of emacs-utf-8 to a byte sequence of a specific
  56   coding system.
  57
  58   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  59   C level, a coding system is represented by a vector of attributes
  60   stored in the hash table Vcharset_hash_table.  The conversion from
  61   coding system symbol to attributes vector is done by looking up
  62   Vcharset_hash_table by the symbol.
  63
  64   Coding systems are classified into the following types depending on
  65   the encoding mechanism.  Here's a brief description of the types.
  66
  67   o UTF-8
  68
  69   o UTF-16
  70
  71   o Charset-base coding system
  72
  73   A coding system defined by one or more (coded) character sets.
  74   Decoding and encoding are done by a code converter defined for each
  75   character set.
  76
  77   o Old Emacs internal format (emacs-mule)
  78
  79   The coding system adopted by old versions of Emacs (20 and 21).
  80
  81   o ISO2022-base coding system
  82
  83   The most famous coding system for multiple character sets.  X's
  84   Compound Text, various EUCs (Extended Unix Code), and coding systems
  85   used in the Internet communication such as ISO-2022-JP are all
  86   variants of ISO2022.
  87
  88   o SJIS (or Shift-JIS or MS-Kanji-Code)
  89
  90   A coding system to encode character sets: ASCII, JISX0201, and
  91   JISX0208.  Widely used for PC's in Japan.  Details are described in
  92   section 8.
  93
  94   o BIG5
  95
  96   A coding system to encode character sets: ASCII and Big5.  Widely
  97   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  98   described in section 8.  In this file, when we write "big5" (all
  99   lowercase), we mean the coding system, and when we write "Big5"
 100   (capitalized), we mean the character set.
 101
 102   o CCL
 103
 104   If a user wants to decode/encode text encoded in a coding system
 105   not listed above, he can supply a decoder and an encoder for it in
 106   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 107   program while decoding/encoding.
 108
 109   o Raw-text
 110
 111   A coding system for text containing raw eight-bit data.  Emacs
 112   treats each byte of source text as a character (except for
 113   end-of-line conversion).
 114
 115   o No-conversion
 116
 117   Like raw text, but don't do end-of-line conversion.
 118
 119
 120 END-OF-LINE FORMAT
 121
 122   How text end-of-line is encoded depends on operating system.  For
 123   instance, Unix's format is just one byte of LF (line-feed) code,
 124   whereas DOS's format is two-byte sequence of `carriage-return' and
 125   `line-feed' codes.  MacOS's format is usually one byte of
 126   `carriage-return'.
 127
 128   Since text character encoding and end-of-line encoding are
 129   independent, any coding system described above can take any format
 130   of end-of-line (except for no-conversion).
 131
 132 STRUCT CODING_SYSTEM
 133
 134   Before using a coding system for code conversion (i.e. decoding and
 135   encoding), we setup a structure of type `struct coding_system'.
 136   This structure keeps various information about a specific code
 137   conversion (e.g. the location of source and destination data).
 138
 139 */
 140
 141 /* COMMON MACROS */
 142
 143
 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 145
 146   These functions check if a byte sequence specified as a source in
 147   CODING conforms to the format of XXX, and update the members of
 148   DETECT_INFO.
 149
 150   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 151
 152   Below is the template of these functions.  */
 153
 154 #if 0
 155 static int
 156 detect_coding_XXX (coding, detect_info)
 157      struct coding_system *coding;
 158      struct coding_detection_info *detect_info;
 159 {
 160   unsigned char *src = coding->source;
 161   unsigned char *src_end = coding->source + coding->src_bytes;
 162   int multibytep = coding->src_multibyte;
 163   int consumed_chars = 0;
 164   int found = 0;
 165   ...;
 166
 167   while (1)
 168     {
 169       /* Get one byte from the source.  If the souce is exausted, jump
 170          to no_more_source:.  */
 171       ONE_MORE_BYTE (c);
 172
 173       if (! __C_conforms_to_XXX___ (c))
 174         break;
 175       if (! __C_strongly_suggests_XXX__ (c))
 176         found = CATEGORY_MASK_XXX;
 177     }
 178   /* The byte sequence is invalid for XXX.  */
 179   detect_info->rejected |= CATEGORY_MASK_XXX;
 180   return 0;
 181
 182  no_more_source:
 183   /* The source exausted successfully.  */
 184   detect_info->found |= found;
 185   return 1;
 186 }
 187 #endif
 188
 189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 190
 191   These functions decode a byte sequence specified as a source by
 192   CODING.  The resulting multibyte text goes to a place pointed to by
 193   CODING->charbuf, the length of which should not exceed
 194   CODING->charbuf_size;
 195
 196   These functions set the information of original and decoded texts in
 197   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 198   They also set CODING->result to one of CODING_RESULT_XXX indicating
 199   how the decoding is finished.
 200
 201   Below is the template of these functions.  */
 202
 203 #if 0
 204 static void
 205 decode_coding_XXXX (coding)
 206      struct coding_system *coding;
 207 {
 208   unsigned char *src = coding->source + coding->consumed;
 209   unsigned char *src_end = coding->source + coding->src_bytes;
 210   /* SRC_BASE remembers the start position in source in each loop.
 211      The loop will be exited when there's not enough source code, or
 212      when there's no room in CHARBUF for a decoded character.  */
 213   unsigned char *src_base;
 214   /* A buffer to produce decoded characters.  */
 215   int *charbuf = coding->charbuf;
 216   int *charbuf_end = charbuf + coding->charbuf_size;
 217   int multibytep = coding->src_multibyte;
 218
 219   while (1)
 220     {
 221       src_base = src;
 222       if (charbuf < charbuf_end)
 223         /* No more room to produce a decoded character.  */
 224         break;
 225       ONE_MORE_BYTE (c);
 226       /* Decode it. */
 227     }
 228
 229  no_more_source:
 230   if (src_base < src_end
 231       && coding->mode & CODING_MODE_LAST_BLOCK)
 232     /* If the source ends by partial bytes to construct a character,
 233        treat them as eight-bit raw data.  */
 234     while (src_base < src_end && charbuf < charbuf_end)
 235       *charbuf++ = *src_base++;
 236   /* Remember how many bytes and characters we consumed.  If the
 237      source is multibyte, the bytes and chars are not identical.  */
 238   coding->consumed = coding->consumed_char = src_base - coding->source;
 239   /* Remember how many characters we produced.  */
 240   coding->charbuf_used = charbuf - coding->charbuf;
 241 }
 242 #endif
 243
 244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 245
 246   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 247   internal multibyte format by CODING.  The resulting byte sequence
 248   goes to a place pointed to by DESTINATION, the length of which
 249   should not exceed DST_BYTES.
 250
 251   These functions set the information of original and encoded texts in
 252   the members produced, produced_char, consumed, and consumed_char of
 253   the structure *CODING.  They also set the member result to one of
 254   CODING_RESULT_XXX indicating how the encoding finished.
 255
 256   DST_BYTES zero means that source area and destination area are
 257   overlapped, which means that we can produce a encoded text until it
 258   reaches at the head of not-yet-encoded source text.
 259
 260   Below is a template of these functions.  */
 261 #if 0
 262 static void
 263 encode_coding_XXX (coding)
 264      struct coding_system *coding;
 265 {
 266   int multibytep = coding->dst_multibyte;
 267   int *charbuf = coding->charbuf;
 268   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 269   unsigned char *dst = coding->destination + coding->produced;
 270   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 271   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 272   int produced_chars = 0;
 273
 274   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 275     {
 276       int c = *charbuf;
 277       /* Encode C into DST, and increment DST.  */
 278     }
 279  label_no_more_destination:
 280   /* How many chars and bytes we produced.  */
 281   coding->produced_char += produced_chars;
 282   coding->produced = dst - coding->destination;
 283 }
 284 #endif
 285
 286 \f
 287 /*** 1. Preamble ***/
 288
 289 #include <config.h>
 290 #include <stdio.h>
 291
 292 #include "lisp.h"
 293 #include "buffer.h"
 294 #include "character.h"
 295 #include "charset.h"
 296 #include "ccl.h"
 297 #include "composite.h"
 298 #include "coding.h"
 299 #include "window.h"
 300
 301 Lisp_Object Vcoding_system_hash_table;
 302
 303 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 304 Lisp_Object Qunix, Qdos;
 305 extern Lisp_Object Qmac;        /* frame.c */
 306 Lisp_Object Qbuffer_file_coding_system;
 307 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 308 Lisp_Object Qdefault_char;
 309 Lisp_Object Qno_conversion, Qundecided;
 310 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 311 Lisp_Object Qbig, Qlittle;
 312 Lisp_Object Qcoding_system_history;
 313 Lisp_Object Qvalid_codes;
 314 Lisp_Object QCcategory;
 315
 316 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 317 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 318 Lisp_Object Qstart_process, Qopen_network_stream;
 319 Lisp_Object Qtarget_idx;
 320
 321 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 322 Lisp_Object Qinterrupted, Qinsufficient_memory;
 323
 324 int coding_system_require_warning;
 325
 326 Lisp_Object Vselect_safe_coding_system_function;
 327
 328 /* Mnemonic string for each format of end-of-line.  */
 329 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 330 /* Mnemonic string to indicate format of end-of-line is not yet
 331    decided.  */
 332 Lisp_Object eol_mnemonic_undecided;
 333
 334 #ifdef emacs
 335
 336 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 337
 338 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 339
 340 /* Coding system emacs-mule and raw-text are for converting only
 341    end-of-line format.  */
 342 Lisp_Object Qemacs_mule, Qraw_text;
 343 Lisp_Object Qutf_8_emacs;
 344
 345 /* Coding-systems are handed between Emacs Lisp programs and C internal
 346    routines by the following three variables.  */
 347 /* Coding-system for reading files and receiving data from process.  */
 348 Lisp_Object Vcoding_system_for_read;
 349 /* Coding-system for writing files and sending data to process.  */
 350 Lisp_Object Vcoding_system_for_write;
 351 /* Coding-system actually used in the latest I/O.  */
 352 Lisp_Object Vlast_coding_system_used;
 353 /* Set to non-nil when an error is detected while code conversion.  */
 354 Lisp_Object Vlast_code_conversion_error;
 355 /* A vector of length 256 which contains information about special
 356    Latin codes (especially for dealing with Microsoft codes).  */
 357 Lisp_Object Vlatin_extra_code_table;
 358
 359 /* Flag to inhibit code conversion of end-of-line format.  */
 360 int inhibit_eol_conversion;
 361
 362 /* Flag to inhibit ISO2022 escape sequence detection.  */
 363 int inhibit_iso_escape_detection;
 364
 365 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 366 int inherit_process_coding_system;
 367
 368 /* Coding system to be used to encode text for terminal display.  */
 369 struct coding_system terminal_coding;
 370
 371 /* Coding system to be used to encode text for terminal display when
 372    terminal coding system is nil.  */
 373 struct coding_system safe_terminal_coding;
 374
 375 /* Coding system of what is sent from terminal keyboard.  */
 376 struct coding_system keyboard_coding;
 377
 378 Lisp_Object Vfile_coding_system_alist;
 379 Lisp_Object Vprocess_coding_system_alist;
 380 Lisp_Object Vnetwork_coding_system_alist;
 381
 382 Lisp_Object Vlocale_coding_system;
 383
 384 #endif /* emacs */
 385
 386 /* Flag to tell if we look up translation table on character code
 387    conversion.  */
 388 Lisp_Object Venable_character_translation;
 389 /* Standard translation table to look up on decoding (reading).  */
 390 Lisp_Object Vstandard_translation_table_for_decode;
 391 /* Standard translation table to look up on encoding (writing).  */
 392 Lisp_Object Vstandard_translation_table_for_encode;
 393
 394 Lisp_Object Qtranslation_table;
 395 Lisp_Object Qtranslation_table_id;
 396 Lisp_Object Qtranslation_table_for_decode;
 397 Lisp_Object Qtranslation_table_for_encode;
 398
 399 /* Alist of charsets vs revision number.  */
 400 static Lisp_Object Vcharset_revision_table;
 401
 402 /* Default coding systems used for process I/O.  */
 403 Lisp_Object Vdefault_process_coding_system;
 404
 405 /* Char table for translating Quail and self-inserting input.  */
 406 Lisp_Object Vtranslation_table_for_input;
 407
 408 /* Two special coding systems.  */
 409 Lisp_Object Vsjis_coding_system;
 410 Lisp_Object Vbig5_coding_system;
 411
 412 static void record_conversion_result (struct coding_system *coding,
 413                                       enum coding_result_code result);
 414 static int detect_coding_utf_8 P_ ((struct coding_system *,
 415                                     struct coding_detection_info *info));
 416 static void decode_coding_utf_8 P_ ((struct coding_system *));
 417 static int encode_coding_utf_8 P_ ((struct coding_system *));
 418
 419 static int detect_coding_utf_16 P_ ((struct coding_system *,
 420                                      struct coding_detection_info *info));
 421 static void decode_coding_utf_16 P_ ((struct coding_system *));
 422 static int encode_coding_utf_16 P_ ((struct coding_system *));
 423
 424 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 425                                        struct coding_detection_info *info));
 426 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 427 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 428
 429 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 430                                          struct coding_detection_info *info));
 431 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 432 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 433
 434 static int detect_coding_sjis P_ ((struct coding_system *,
 435                                    struct coding_detection_info *info));
 436 static void decode_coding_sjis P_ ((struct coding_system *));
 437 static int encode_coding_sjis P_ ((struct coding_system *));
 438
 439 static int detect_coding_big5 P_ ((struct coding_system *,
 440                                    struct coding_detection_info *info));
 441 static void decode_coding_big5 P_ ((struct coding_system *));
 442 static int encode_coding_big5 P_ ((struct coding_system *));
 443
 444 static int detect_coding_ccl P_ ((struct coding_system *,
 445                                   struct coding_detection_info *info));
 446 static void decode_coding_ccl P_ ((struct coding_system *));
 447 static int encode_coding_ccl P_ ((struct coding_system *));
 448
 449 static void decode_coding_raw_text P_ ((struct coding_system *));
 450 static int encode_coding_raw_text P_ ((struct coding_system *));
 451
 452
 453 /* ISO2022 section */
 454
 455 #define CODING_ISO_INITIAL(coding, reg)                 \
 456   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 457                      coding_attr_iso_initial),          \
 458                reg)))
 459
 460
 461 #define CODING_ISO_REQUEST(coding, charset_id)  \
 462   ((charset_id <= (coding)->max_charset_id      \
 463     ? (coding)->safe_charsets[charset_id]       \
 464     : -1))
 465
 466
 467 #define CODING_ISO_FLAGS(coding)        \
 468   ((coding)->spec.iso_2022.flags)
 469 #define CODING_ISO_DESIGNATION(coding, reg)     \
 470   ((coding)->spec.iso_2022.current_designation[reg])
 471 #define CODING_ISO_INVOCATION(coding, plane)    \
 472   ((coding)->spec.iso_2022.current_invocation[plane])
 473 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 474   ((coding)->spec.iso_2022.single_shifting)
 475 #define CODING_ISO_BOL(coding)  \
 476   ((coding)->spec.iso_2022.bol)
 477 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 478   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 479
 480 /* Control characters of ISO2022.  */
 481                         /* code */      /* function */
 482 #define ISO_CODE_LF     0x0A            /* line-feed */
 483 #define ISO_CODE_CR     0x0D            /* carriage-return */
 484 #define ISO_CODE_SO     0x0E            /* shift-out */
 485 #define ISO_CODE_SI     0x0F            /* shift-in */
 486 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 487 #define ISO_CODE_ESC    0x1B            /* escape */
 488 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 489 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 490 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 491
 492 /* All code (1-byte) of ISO2022 is classified into one of the
 493    followings.  */
 494 enum iso_code_class_type
 495   {
 496     ISO_control_0,              /* Control codes in the range
 497                                    0x00..0x1F and 0x7F, except for the
 498                                    following 5 codes.  */
 499     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 500     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 501     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 502     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 503     ISO_control_1,              /* Control codes in the range
 504                                    0x80..0x9F, except for the
 505                                    following 3 codes.  */
 506     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 507     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 508     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 509     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 510     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 511     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 512     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 513   };
 514
 515 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 516     `iso-flags' attribute of an iso2022 coding system.  */
 517
 518 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 519    instead of the correct short-form sequence (e.g. ESC $ A).  */
 520 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 521
 522 /* If set, reset graphic planes and registers at end-of-line to the
 523    initial state.  */
 524 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 525
 526 /* If set, reset graphic planes and registers before any control
 527    characters to the initial state.  */
 528 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 529
 530 /* If set, encode by 7-bit environment.  */
 531 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 532
 533 /* If set, use locking-shift function.  */
 534 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 535
 536 /* If set, use single-shift function.  Overwrite
 537    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 538 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 539
 540 /* If set, use designation escape sequence.  */
 541 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 542
 543 /* If set, produce revision number sequence.  */
 544 #define CODING_ISO_FLAG_REVISION        0x0080
 545
 546 /* If set, produce ISO6429's direction specifying sequence.  */
 547 #define CODING_ISO_FLAG_DIRECTION       0x0100
 548
 549 /* If set, assume designation states are reset at beginning of line on
 550    output.  */
 551 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 552
 553 /* If set, designation sequence should be placed at beginning of line
 554    on output.  */
 555 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 556
 557 /* If set, do not encode unsafe charactes on output.  */
 558 #define CODING_ISO_FLAG_SAFE            0x0800
 559
 560 /* If set, extra latin codes (128..159) are accepted as a valid code
 561    on input.  */
 562 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 563
 564 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 565
 566 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 567
 568 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 569
 570 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 571
 572 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 573
 574 /* A character to be produced on output if encoding of the original
 575    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 576 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 577
 578
 579 /* UTF-16 section */
 580 #define CODING_UTF_16_BOM(coding)       \
 581   ((coding)->spec.utf_16.bom)
 582
 583 #define CODING_UTF_16_ENDIAN(coding)    \
 584   ((coding)->spec.utf_16.endian)
 585
 586 #define CODING_UTF_16_SURROGATE(coding) \
 587   ((coding)->spec.utf_16.surrogate)
 588
 589
 590 /* CCL section */
 591 #define CODING_CCL_DECODER(coding)      \
 592   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 593 #define CODING_CCL_ENCODER(coding)      \
 594   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 595 #define CODING_CCL_VALIDS(coding)                                          \
 596   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 597
 598 /* Index for each coding category in `coding_categories' */
 599
 600 enum coding_category
 601   {
 602     coding_category_iso_7,
 603     coding_category_iso_7_tight,
 604     coding_category_iso_8_1,
 605     coding_category_iso_8_2,
 606     coding_category_iso_7_else,
 607     coding_category_iso_8_else,
 608     coding_category_utf_8,
 609     coding_category_utf_16_auto,
 610     coding_category_utf_16_be,
 611     coding_category_utf_16_le,
 612     coding_category_utf_16_be_nosig,
 613     coding_category_utf_16_le_nosig,
 614     coding_category_charset,
 615     coding_category_sjis,
 616     coding_category_big5,
 617     coding_category_ccl,
 618     coding_category_emacs_mule,
 619     /* All above are targets of code detection.  */
 620     coding_category_raw_text,
 621     coding_category_undecided,
 622     coding_category_max
 623   };
 624
 625 /* Definitions of flag bits used in detect_coding_XXXX.  */
 626 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 627 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 628 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 629 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 630 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 631 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 632 #define CATEGORY_MASK_UTF_8             (1 << coding_category_utf_8)
 633 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 634 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 635 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 636 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 637 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 638 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 639 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 640 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 641 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 642 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 643 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 644
 645 /* This value is returned if detect_coding_mask () find nothing other
 646    than ASCII characters.  */
 647 #define CATEGORY_MASK_ANY               \
 648   (CATEGORY_MASK_ISO_7                  \
 649    | CATEGORY_MASK_ISO_7_TIGHT          \
 650    | CATEGORY_MASK_ISO_8_1              \
 651    | CATEGORY_MASK_ISO_8_2              \
 652    | CATEGORY_MASK_ISO_7_ELSE           \
 653    | CATEGORY_MASK_ISO_8_ELSE           \
 654    | CATEGORY_MASK_UTF_8                \
 655    | CATEGORY_MASK_UTF_16_BE            \
 656    | CATEGORY_MASK_UTF_16_LE            \
 657    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 658    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 659    | CATEGORY_MASK_CHARSET              \
 660    | CATEGORY_MASK_SJIS                 \
 661    | CATEGORY_MASK_BIG5                 \
 662    | CATEGORY_MASK_CCL                  \
 663    | CATEGORY_MASK_EMACS_MULE)
 664
 665
 666 #define CATEGORY_MASK_ISO_7BIT \
 667   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 668
 669 #define CATEGORY_MASK_ISO_8BIT \
 670   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 671
 672 #define CATEGORY_MASK_ISO_ELSE \
 673   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 674
 675 #define CATEGORY_MASK_ISO_ESCAPE        \
 676   (CATEGORY_MASK_ISO_7                  \
 677    | CATEGORY_MASK_ISO_7_TIGHT          \
 678    | CATEGORY_MASK_ISO_7_ELSE           \
 679    | CATEGORY_MASK_ISO_8_ELSE)
 680
 681 #define CATEGORY_MASK_ISO       \
 682   (  CATEGORY_MASK_ISO_7BIT     \
 683      | CATEGORY_MASK_ISO_8BIT   \
 684      | CATEGORY_MASK_ISO_ELSE)
 685
 686 #define CATEGORY_MASK_UTF_16            \
 687   (CATEGORY_MASK_UTF_16_BE              \
 688    | CATEGORY_MASK_UTF_16_LE            \
 689    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 690    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 691
 692
 693 /* List of symbols `coding-category-xxx' ordered by priority.  This
 694    variable is exposed to Emacs Lisp.  */
 695 static Lisp_Object Vcoding_category_list;
 696
 697 /* Table of coding categories (Lisp symbols).  This variable is for
 698    internal use oly.  */
 699 static Lisp_Object Vcoding_category_table;
 700
 701 /* Table of coding-categories ordered by priority.  */
 702 static enum coding_category coding_priorities[coding_category_max];
 703
 704 /* Nth element is a coding context for the coding system bound to the
 705    Nth coding category.  */
 706 static struct coding_system coding_categories[coding_category_max];
 707
 708 /*** Commonly used macros and functions ***/
 709
 710 #ifndef min
 711 #define min(a, b) ((a) < (b) ? (a) : (b))
 712 #endif
 713 #ifndef max
 714 #define max(a, b) ((a) > (b) ? (a) : (b))
 715 #endif
 716
 717 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 718   do {                                                  \
 719     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 720     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 721   } while (0)
 722
 723
 724 /* Safely get one byte from the source text pointed by SRC which ends
 725    at SRC_END, and set C to that byte.  If there are not enough bytes
 726    in the source, it jumps to `no_more_source'.  If multibytep is
 727    nonzero, and a multibyte character is found at SRC, set C to the
 728    negative value of the character code.  The caller should declare
 729    and set these variables appropriately in advance:
 730         src, src_end, multibytep */
 731
 732 #define ONE_MORE_BYTE(c)                                \
 733   do {                                                  \
 734     if (src == src_end)                                 \
 735       {                                                 \
 736         if (src_base < src)                             \
 737           record_conversion_result                      \
 738             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 739         goto no_more_source;                            \
 740       }                                                 \
 741     c = *src++;                                         \
 742     if (multibytep && (c & 0x80))                       \
 743       {                                                 \
 744         if ((c & 0xFE) == 0xC0)                         \
 745           c = ((c & 1) << 6) | *src++;                  \
 746         else                                            \
 747           {                                             \
 748             c = - string_char (--src, &src, NULL);      \
 749             record_conversion_result                    \
 750               (coding, CODING_RESULT_INVALID_SRC);      \
 751           }                                             \
 752       }                                                 \
 753     consumed_chars++;                                   \
 754   } while (0)
 755
 756
 757 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 758   do {                                                  \
 759     c = *src++;                                         \
 760     if (multibytep && (c & 0x80))                       \
 761       {                                                 \
 762         if ((c & 0xFE) == 0xC0)                         \
 763           c = ((c & 1) << 6) | *src++;                  \
 764         else                                            \
 765           {                                             \
 766             c = - string_char (--src, &src, NULL);      \
 767             record_conversion_result                    \
 768               (coding, CODING_RESULT_INVALID_SRC);      \
 769           }                                             \
 770       }                                                 \
 771     consumed_chars++;                                   \
 772   } while (0)
 773
 774
 775 /* Store a byte C in the place pointed by DST and increment DST to the
 776    next free point, and increment PRODUCED_CHARS.  The caller should
 777    assure that C is 0..127, and declare and set the variable `dst'
 778    appropriately in advance.
 779 */
 780
 781
 782 #define EMIT_ONE_ASCII_BYTE(c)  \
 783   do {                          \
 784     produced_chars++;           \
 785     *dst++ = (c);               \
 786   } while (0)
 787
 788
 789 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 790
 791 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 792   do {                                  \
 793     produced_chars += 2;                \
 794     *dst++ = (c1), *dst++ = (c2);       \
 795   } while (0)
 796
 797
 798 /* Store a byte C in the place pointed by DST and increment DST to the
 799    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 800    nonzero, store in an appropriate multibyte from.  The caller should
 801    declare and set the variables `dst' and `multibytep' appropriately
 802    in advance.  */
 803
 804 #define EMIT_ONE_BYTE(c)                \
 805   do {                                  \
 806     produced_chars++;                   \
 807     if (multibytep)                     \
 808       {                                 \
 809         int ch = (c);                   \
 810         if (ch >= 0x80)                 \
 811           ch = BYTE8_TO_CHAR (ch);      \
 812         CHAR_STRING_ADVANCE (ch, dst);  \
 813       }                                 \
 814     else                                \
 815       *dst++ = (c);                     \
 816   } while (0)
 817
 818
 819 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 820
 821 #define EMIT_TWO_BYTES(c1, c2)          \
 822   do {                                  \
 823     produced_chars += 2;                \
 824     if (multibytep)                     \
 825       {                                 \
 826         int ch;                         \
 827                                         \
 828         ch = (c1);                      \
 829         if (ch >= 0x80)                 \
 830           ch = BYTE8_TO_CHAR (ch);      \
 831         CHAR_STRING_ADVANCE (ch, dst);  \
 832         ch = (c2);                      \
 833         if (ch >= 0x80)                 \
 834           ch = BYTE8_TO_CHAR (ch);      \
 835         CHAR_STRING_ADVANCE (ch, dst);  \
 836       }                                 \
 837     else                                \
 838       {                                 \
 839         *dst++ = (c1);                  \
 840         *dst++ = (c2);                  \
 841       }                                 \
 842   } while (0)
 843
 844
 845 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 846   do {                                  \
 847     EMIT_ONE_BYTE (c1);                 \
 848     EMIT_TWO_BYTES (c2, c3);            \
 849   } while (0)
 850
 851
 852 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 853   do {                                          \
 854     EMIT_TWO_BYTES (c1, c2);                    \
 855     EMIT_TWO_BYTES (c3, c4);                    \
 856   } while (0)
 857
 858
 859 static void
 860 record_conversion_result (struct coding_system *coding,
 861                           enum coding_result_code result)
 862 {
 863   coding->result = result;
 864   switch (result)
 865     {
 866     case CODING_RESULT_INSUFFICIENT_SRC:
 867       Vlast_code_conversion_error = Qinsufficient_source;
 868       break;
 869     case CODING_RESULT_INCONSISTENT_EOL:
 870       Vlast_code_conversion_error = Qinconsistent_eol;
 871       break;
 872     case CODING_RESULT_INVALID_SRC:
 873       Vlast_code_conversion_error = Qinvalid_source;
 874       break;
 875     case CODING_RESULT_INTERRUPT:
 876       Vlast_code_conversion_error = Qinterrupted;
 877       break;
 878     case CODING_RESULT_INSUFFICIENT_MEM:
 879       Vlast_code_conversion_error = Qinsufficient_memory;
 880       break;
 881     }
 882 }
 883
 884 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 885   do {                                                                       \
 886     charset_map_loaded = 0;                                                  \
 887     c = DECODE_CHAR (charset, code);                                         \
 888     if (charset_map_loaded)                                                  \
 889       {                                                                      \
 890         const unsigned char *orig = coding->source;                          \
 891         EMACS_INT offset;                                                    \
 892                                                                              \
 893         coding_set_source (coding);                                          \
 894         offset = coding->source - orig;                                      \
 895         src += offset;                                                       \
 896         src_base += offset;                                                  \
 897         src_end += offset;                                                   \
 898       }                                                                      \
 899   } while (0)
 900
 901
 902 #define ASSURE_DESTINATION(bytes)                               \
 903   do {                                                          \
 904     if (dst + (bytes) >= dst_end)                               \
 905       {                                                         \
 906         int more_bytes = charbuf_end - charbuf + (bytes);       \
 907                                                                 \
 908         dst = alloc_destination (coding, more_bytes, dst);      \
 909         dst_end = coding->destination + coding->dst_bytes;      \
 910       }                                                         \
 911   } while (0)
 912
 913
 914
 915 static void
 916 coding_set_source (coding)
 917      struct coding_system *coding;
 918 {
 919   if (BUFFERP (coding->src_object))
 920     {
 921       struct buffer *buf = XBUFFER (coding->src_object);
 922
 923       if (coding->src_pos < 0)
 924         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
 925       else
 926         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
 927     }
 928   else if (STRINGP (coding->src_object))
 929     {
 930       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
 931     }
 932   else
 933     /* Otherwise, the source is C string and is never relocated
 934        automatically.  Thus we don't have to update anything.  */
 935     ;
 936 }
 937
 938 static void
 939 coding_set_destination (coding)
 940      struct coding_system *coding;
 941 {
 942   if (BUFFERP (coding->dst_object))
 943     {
 944       if (coding->src_pos < 0)
 945         {
 946           coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
 947           coding->dst_bytes = (GAP_END_ADDR
 948                                - (coding->src_bytes - coding->consumed)
 949                                - coding->destination);
 950         }
 951       else
 952         {
 953           /* We are sure that coding->dst_pos_byte is before the gap
 954              of the buffer. */
 955           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
 956                                  + coding->dst_pos_byte - 1);
 957           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
 958                                - coding->destination);
 959         }
 960     }
 961   else
 962     /* Otherwise, the destination is C string and is never relocated
 963        automatically.  Thus we don't have to update anything.  */
 964     ;
 965 }
 966
 967
 968 static void
 969 coding_alloc_by_realloc (coding, bytes)
 970      struct coding_system *coding;
 971      EMACS_INT bytes;
 972 {
 973   coding->destination = (unsigned char *) xrealloc (coding->destination,
 974                                                     coding->dst_bytes + bytes);
 975   coding->dst_bytes += bytes;
 976 }
 977
 978 static void
 979 coding_alloc_by_making_gap (coding, bytes)
 980      struct coding_system *coding;
 981      EMACS_INT bytes;
 982 {
 983   if (BUFFERP (coding->dst_object)
 984       && EQ (coding->src_object, coding->dst_object))
 985     {
 986       EMACS_INT add = coding->src_bytes - coding->consumed;
 987
 988       GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
 989       make_gap (bytes);
 990       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
 991     }
 992   else
 993     {
 994       Lisp_Object this_buffer;
 995
 996       this_buffer = Fcurrent_buffer ();
 997       set_buffer_internal (XBUFFER (coding->dst_object));
 998       make_gap (bytes);
 999       set_buffer_internal (XBUFFER (this_buffer));
1000     }
1001 }
1002
1003
1004 static unsigned char *
1005 alloc_destination (coding, nbytes, dst)
1006      struct coding_system *coding;
1007      EMACS_INT nbytes;
1008      unsigned char *dst;
1009 {
1010   EMACS_INT offset = dst - coding->destination;
1011
1012   if (BUFFERP (coding->dst_object))
1013     coding_alloc_by_making_gap (coding, nbytes);
1014   else
1015     coding_alloc_by_realloc (coding, nbytes);
1016   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1017   coding_set_destination (coding);
1018   dst = coding->destination + offset;
1019   return dst;
1020 }
1021
1022 /** Macros for annotations.  */
1023
1024 /* Maximum length of annotation data (sum of annotations for
1025    composition and charset).  */
1026 #define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
1027
1028 /* An annotation data is stored in the array coding->charbuf in this
1029    format:
1030      [ -LENGTH ANNOTATION_MASK FROM TO ... ]
1031    LENGTH is the number of elements in the annotation.
1032    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1033    FROM and TO specify the range of text annotated.  They are relative
1034    to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
1035
1036    The format of the following elements depend on ANNOTATION_MASK.
1037
1038    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1039    follows:
1040      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1041    METHOD is one of enum composition_method.
1042    Optionnal COMPOSITION-COMPONENTS are characters and composition
1043    rules.
1044
1045    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1046    follows.  */
1047
1048 #define ADD_ANNOTATION_DATA(buf, len, mask, from, to)   \
1049   do {                                                  \
1050     *(buf)++ = -(len);                                  \
1051     *(buf)++ = (mask);                                  \
1052     *(buf)++ = (from);                                  \
1053     *(buf)++ = (to);                                    \
1054     coding->annotated = 1;                              \
1055   } while (0);
1056
1057 #define ADD_COMPOSITION_DATA(buf, from, to, method)                           \
1058   do {                                                                        \
1059     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1060     *buf++ = method;                                                          \
1061   } while (0)
1062
1063
1064 #define ADD_CHARSET_DATA(buf, from, to, id)                               \
1065   do {                                                                    \
1066     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1067     *buf++ = id;                                                          \
1068   } while (0)
1069
1070 \f
1071 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1072
1073
1074
1075 \f
1076 /*** 3. UTF-8 ***/
1077
1078 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1079    Check if a text is encoded in UTF-8.  If it is, return 1, else
1080    return 0.  */
1081
1082 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1083 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1084 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1085 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1086 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1087 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1088
1089 static int
1090 detect_coding_utf_8 (coding, detect_info)
1091      struct coding_system *coding;
1092      struct coding_detection_info *detect_info;
1093 {
1094   const unsigned char *src = coding->source, *src_base;
1095   const unsigned char *src_end = coding->source + coding->src_bytes;
1096   int multibytep = coding->src_multibyte;
1097   int consumed_chars = 0;
1098   int found = 0;
1099
1100   detect_info->checked |= CATEGORY_MASK_UTF_8;
1101   /* A coding system of this category is always ASCII compatible.  */
1102   src += coding->head_ascii;
1103
1104   while (1)
1105     {
1106       int c, c1, c2, c3, c4;
1107
1108       src_base = src;
1109       ONE_MORE_BYTE (c);
1110       if (c < 0 || UTF_8_1_OCTET_P (c))
1111         continue;
1112       ONE_MORE_BYTE (c1);
1113       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1114         break;
1115       if (UTF_8_2_OCTET_LEADING_P (c))
1116         {
1117           found = CATEGORY_MASK_UTF_8;
1118           continue;
1119         }
1120       ONE_MORE_BYTE (c2);
1121       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1122         break;
1123       if (UTF_8_3_OCTET_LEADING_P (c))
1124         {
1125           found = CATEGORY_MASK_UTF_8;
1126           continue;
1127         }
1128       ONE_MORE_BYTE (c3);
1129       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1130         break;
1131       if (UTF_8_4_OCTET_LEADING_P (c))
1132         {
1133           found = CATEGORY_MASK_UTF_8;
1134           continue;
1135         }
1136       ONE_MORE_BYTE (c4);
1137       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1138         break;
1139       if (UTF_8_5_OCTET_LEADING_P (c))
1140         {
1141           found = CATEGORY_MASK_UTF_8;
1142           continue;
1143         }
1144       break;
1145     }
1146   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1147   return 0;
1148
1149  no_more_source:
1150   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1151     {
1152       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1153       return 0;
1154     }
1155   detect_info->found |= found;
1156   return 1;
1157 }
1158
1159
1160 static void
1161 decode_coding_utf_8 (coding)
1162      struct coding_system *coding;
1163 {
1164   const unsigned char *src = coding->source + coding->consumed;
1165   const unsigned char *src_end = coding->source + coding->src_bytes;
1166   const unsigned char *src_base;
1167   int *charbuf = coding->charbuf;
1168   int *charbuf_end = charbuf + coding->charbuf_size;
1169   int consumed_chars = 0, consumed_chars_base;
1170   int multibytep = coding->src_multibyte;
1171   Lisp_Object attr, charset_list;
1172
1173   CODING_GET_INFO (coding, attr, charset_list);
1174
1175   while (1)
1176     {
1177       int c, c1, c2, c3, c4, c5;
1178
1179       src_base = src;
1180       consumed_chars_base = consumed_chars;
1181
1182       if (charbuf >= charbuf_end)
1183         break;
1184
1185       ONE_MORE_BYTE (c1);
1186       if (c1 < 0)
1187         {
1188           c = - c1;
1189         }
1190       else if (UTF_8_1_OCTET_P(c1))
1191         {
1192           c = c1;
1193         }
1194       else
1195         {
1196           ONE_MORE_BYTE (c2);
1197           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1198             goto invalid_code;
1199           if (UTF_8_2_OCTET_LEADING_P (c1))
1200             {
1201               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1202               /* Reject overlong sequences here and below.  Encoders
1203                  producing them are incorrect, they can be misleading,
1204                  and they mess up read/write invariance.  */
1205               if (c < 128)
1206                 goto invalid_code;
1207             }
1208           else
1209             {
1210               ONE_MORE_BYTE (c3);
1211               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1212                 goto invalid_code;
1213               if (UTF_8_3_OCTET_LEADING_P (c1))
1214                 {
1215                   c = (((c1 & 0xF) << 12)
1216                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1217                   if (c < 0x800
1218                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1219                     goto invalid_code;
1220                 }
1221               else
1222                 {
1223                   ONE_MORE_BYTE (c4);
1224                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1225                     goto invalid_code;
1226                   if (UTF_8_4_OCTET_LEADING_P (c1))
1227                     {
1228                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1229                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1230                     if (c < 0x10000)
1231                       goto invalid_code;
1232                     }
1233                   else
1234                     {
1235                       ONE_MORE_BYTE (c5);
1236                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1237                         goto invalid_code;
1238                       if (UTF_8_5_OCTET_LEADING_P (c1))
1239                         {
1240                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1241                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1242                                | (c5 & 0x3F));
1243                           if ((c > MAX_CHAR) || (c < 0x200000))
1244                             goto invalid_code;
1245                         }
1246                       else
1247                         goto invalid_code;
1248                     }
1249                 }
1250             }
1251         }
1252
1253       *charbuf++ = c;
1254       continue;
1255
1256     invalid_code:
1257       src = src_base;
1258       consumed_chars = consumed_chars_base;
1259       ONE_MORE_BYTE (c);
1260       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1261       coding->errors++;
1262     }
1263
1264  no_more_source:
1265   coding->consumed_char += consumed_chars_base;
1266   coding->consumed = src_base - coding->source;
1267   coding->charbuf_used = charbuf - coding->charbuf;
1268 }
1269
1270
1271 static int
1272 encode_coding_utf_8 (coding)
1273      struct coding_system *coding;
1274 {
1275   int multibytep = coding->dst_multibyte;
1276   int *charbuf = coding->charbuf;
1277   int *charbuf_end = charbuf + coding->charbuf_used;
1278   unsigned char *dst = coding->destination + coding->produced;
1279   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1280   int produced_chars = 0;
1281   int c;
1282
1283   if (multibytep)
1284     {
1285       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1286
1287       while (charbuf < charbuf_end)
1288         {
1289           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1290
1291           ASSURE_DESTINATION (safe_room);
1292           c = *charbuf++;
1293           if (CHAR_BYTE8_P (c))
1294             {
1295               c = CHAR_TO_BYTE8 (c);
1296               EMIT_ONE_BYTE (c);
1297             }
1298           else
1299             {
1300               CHAR_STRING_ADVANCE (c, pend);
1301               for (p = str; p < pend; p++)
1302                 EMIT_ONE_BYTE (*p);
1303             }
1304         }
1305     }
1306   else
1307     {
1308       int safe_room = MAX_MULTIBYTE_LENGTH;
1309
1310       while (charbuf < charbuf_end)
1311         {
1312           ASSURE_DESTINATION (safe_room);
1313           c = *charbuf++;
1314           dst += CHAR_STRING (c, dst);
1315           produced_chars++;
1316         }
1317     }
1318   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1319   coding->produced_char += produced_chars;
1320   coding->produced = dst - coding->destination;
1321   return 0;
1322 }
1323
1324
1325 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1326    Check if a text is encoded in one of UTF-16 based coding systems.
1327    If it is, return 1, else return 0.  */
1328
1329 #define UTF_16_HIGH_SURROGATE_P(val) \
1330   (((val) & 0xFC00) == 0xD800)
1331
1332 #define UTF_16_LOW_SURROGATE_P(val) \
1333   (((val) & 0xFC00) == 0xDC00)
1334
1335 #define UTF_16_INVALID_P(val)   \
1336   (((val) == 0xFFFE)            \
1337    || ((val) == 0xFFFF)         \
1338    || UTF_16_LOW_SURROGATE_P (val))
1339
1340
1341 static int
1342 detect_coding_utf_16 (coding, detect_info)
1343      struct coding_system *coding;
1344      struct coding_detection_info *detect_info;
1345 {
1346   const unsigned char *src = coding->source, *src_base = src;
1347   const unsigned char *src_end = coding->source + coding->src_bytes;
1348   int multibytep = coding->src_multibyte;
1349   int consumed_chars = 0;
1350   int c1, c2;
1351
1352   detect_info->checked |= CATEGORY_MASK_UTF_16;
1353   if (coding->mode & CODING_MODE_LAST_BLOCK
1354       && (coding->src_chars & 1))
1355     {
1356       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1357       return 0;
1358     }
1359
1360   ONE_MORE_BYTE (c1);
1361   ONE_MORE_BYTE (c2);
1362   if ((c1 == 0xFF) && (c2 == 0xFE))
1363     {
1364       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1365                              | CATEGORY_MASK_UTF_16_AUTO);
1366       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1367                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1368                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1369     }
1370   else if ((c1 == 0xFE) && (c2 == 0xFF))
1371     {
1372       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1373                              | CATEGORY_MASK_UTF_16_AUTO);
1374       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1375                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1376                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1377     }
1378   else if (c1 >= 0 && c2 >= 0)
1379     {
1380       unsigned char b1[256], b2[256];
1381       int b1_variants = 1, b2_variants = 1;
1382       int n;
1383
1384       bzero (b1, 256), bzero (b2, 256);
1385       b1[c1]++, b2[c2]++;
1386       for (n = 0; n < 256 && src < src_end; n++)
1387         {
1388           src_base = src;
1389           ONE_MORE_BYTE (c1);
1390           ONE_MORE_BYTE (c2);
1391           if (c1 < 0 || c2 < 0)
1392             break;
1393           if (! b1[c1++]) b1_variants++;
1394           if (! b2[c2++]) b2_variants++;
1395         }
1396       if (b1_variants < b2_variants)
1397         detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1398       else
1399         detect_info->found |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1400       detect_info->rejected
1401         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1402     }
1403  no_more_source:
1404   return 1;
1405 }
1406
1407 static void
1408 decode_coding_utf_16 (coding)
1409      struct coding_system *coding;
1410 {
1411   const unsigned char *src = coding->source + coding->consumed;
1412   const unsigned char *src_end = coding->source + coding->src_bytes;
1413   const unsigned char *src_base;
1414   int *charbuf = coding->charbuf;
1415   int *charbuf_end = charbuf + coding->charbuf_size;
1416   int consumed_chars = 0, consumed_chars_base;
1417   int multibytep = coding->src_multibyte;
1418   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1419   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1420   int surrogate = CODING_UTF_16_SURROGATE (coding);
1421   Lisp_Object attr, charset_list;
1422
1423   CODING_GET_INFO (coding, attr, charset_list);
1424
1425   if (bom == utf_16_with_bom)
1426     {
1427       int c, c1, c2;
1428
1429       src_base = src;
1430       ONE_MORE_BYTE (c1);
1431       ONE_MORE_BYTE (c2);
1432       c = (c1 << 8) | c2;
1433
1434       if (endian == utf_16_big_endian
1435           ? c != 0xFEFF : c != 0xFFFE)
1436         {
1437           /* The first two bytes are not BOM.  Treat them as bytes
1438              for a normal character.  */
1439           src = src_base;
1440           coding->errors++;
1441         }
1442       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1443     }
1444   else if (bom == utf_16_detect_bom)
1445     {
1446       /* We have already tried to detect BOM and failed in
1447          detect_coding.  */
1448       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1449     }
1450
1451   while (1)
1452     {
1453       int c, c1, c2;
1454
1455       src_base = src;
1456       consumed_chars_base = consumed_chars;
1457
1458       if (charbuf + 2 >= charbuf_end)
1459         break;
1460
1461       ONE_MORE_BYTE (c1);
1462       if (c1 < 0)
1463         {
1464           *charbuf++ = -c1;
1465           continue;
1466         }
1467       ONE_MORE_BYTE (c2);
1468       if (c2 < 0)
1469         {
1470           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1471           *charbuf++ = -c2;
1472           continue;
1473         }
1474       c = (endian == utf_16_big_endian
1475            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1476       if (surrogate)
1477         {
1478           if (! UTF_16_LOW_SURROGATE_P (c))
1479             {
1480               if (endian == utf_16_big_endian)
1481                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1482               else
1483                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1484               *charbuf++ = c1;
1485               *charbuf++ = c2;
1486               coding->errors++;
1487               if (UTF_16_HIGH_SURROGATE_P (c))
1488                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1489               else
1490                 *charbuf++ = c;
1491             }
1492           else
1493             {
1494               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1495               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1496               *charbuf++ = c;
1497             }
1498         }
1499       else
1500         {
1501           if (UTF_16_HIGH_SURROGATE_P (c))
1502             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1503           else
1504             *charbuf++ = c;
1505         }
1506     }
1507
1508  no_more_source:
1509   coding->consumed_char += consumed_chars_base;
1510   coding->consumed = src_base - coding->source;
1511   coding->charbuf_used = charbuf - coding->charbuf;
1512 }
1513
1514 static int
1515 encode_coding_utf_16 (coding)
1516      struct coding_system *coding;
1517 {
1518   int multibytep = coding->dst_multibyte;
1519   int *charbuf = coding->charbuf;
1520   int *charbuf_end = charbuf + coding->charbuf_used;
1521   unsigned char *dst = coding->destination + coding->produced;
1522   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1523   int safe_room = 8;
1524   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1525   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1526   int produced_chars = 0;
1527   Lisp_Object attrs, charset_list;
1528   int c;
1529
1530   CODING_GET_INFO (coding, attrs, charset_list);
1531
1532   if (bom != utf_16_without_bom)
1533     {
1534       ASSURE_DESTINATION (safe_room);
1535       if (big_endian)
1536         EMIT_TWO_BYTES (0xFE, 0xFF);
1537       else
1538         EMIT_TWO_BYTES (0xFF, 0xFE);
1539       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1540     }
1541
1542   while (charbuf < charbuf_end)
1543     {
1544       ASSURE_DESTINATION (safe_room);
1545       c = *charbuf++;
1546       if (c >= MAX_UNICODE_CHAR)
1547         c = coding->default_char;
1548
1549       if (c < 0x10000)
1550         {
1551           if (big_endian)
1552             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1553           else
1554             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1555         }
1556       else
1557         {
1558           int c1, c2;
1559
1560           c -= 0x10000;
1561           c1 = (c >> 10) + 0xD800;
1562           c2 = (c & 0x3FF) + 0xDC00;
1563           if (big_endian)
1564             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1565           else
1566             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1567         }
1568     }
1569   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1570   coding->produced = dst - coding->destination;
1571   coding->produced_char += produced_chars;
1572   return 0;
1573 }
1574
1575 \f
1576 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1577
1578 /* Emacs' internal format for representation of multiple character
1579    sets is a kind of multi-byte encoding, i.e. characters are
1580    represented by variable-length sequences of one-byte codes.
1581
1582    ASCII characters and control characters (e.g. `tab', `newline') are
1583    represented by one-byte sequences which are their ASCII codes, in
1584    the range 0x00 through 0x7F.
1585
1586    8-bit characters of the range 0x80..0x9F are represented by
1587    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1588    code + 0x20).
1589
1590    8-bit characters of the range 0xA0..0xFF are represented by
1591    one-byte sequences which are their 8-bit code.
1592
1593    The other characters are represented by a sequence of `base
1594    leading-code', optional `extended leading-code', and one or two
1595    `position-code's.  The length of the sequence is determined by the
1596    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1597    whereas extended leading-code and position-code take the range 0xA0
1598    through 0xFF.  See `charset.h' for more details about leading-code
1599    and position-code.
1600
1601    --- CODE RANGE of Emacs' internal format ---
1602    character set        range
1603    -------------        -----
1604    ascii                0x00..0x7F
1605    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1606    eight-bit-graphic    0xA0..0xBF
1607    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1608    ---------------------------------------------
1609
1610    As this is the internal character representation, the format is
1611    usually not used externally (i.e. in a file or in a data sent to a
1612    process).  But, it is possible to have a text externally in this
1613    format (i.e. by encoding by the coding system `emacs-mule').
1614
1615    In that case, a sequence of one-byte codes has a slightly different
1616    form.
1617
1618    At first, all characters in eight-bit-control are represented by
1619    one-byte sequences which are their 8-bit code.
1620
1621    Next, character composition data are represented by the byte
1622    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1623    where,
1624         METHOD is 0xF0 plus one of composition method (enum
1625         composition_method),
1626
1627         BYTES is 0xA0 plus a byte length of this composition data,
1628
1629         CHARS is 0x20 plus a number of characters composed by this
1630         data,
1631
1632         COMPONENTs are characters of multibye form or composition
1633         rules encoded by two-byte of ASCII codes.
1634
1635    In addition, for backward compatibility, the following formats are
1636    also recognized as composition data on decoding.
1637
1638    0x80 MSEQ ...
1639    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1640
1641    Here,
1642         MSEQ is a multibyte form but in these special format:
1643           ASCII: 0xA0 ASCII_CODE+0x80,
1644           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1645         RULE is a one byte code of the range 0xA0..0xF0 that
1646         represents a composition rule.
1647   */
1648
1649 char emacs_mule_bytes[256];
1650
1651 int
1652 emacs_mule_char (coding, src, nbytes, nchars, id)
1653      struct coding_system *coding;
1654      const unsigned char *src;
1655      int *nbytes, *nchars, *id;
1656 {
1657   const unsigned char *src_end = coding->source + coding->src_bytes;
1658   const unsigned char *src_base = src;
1659   int multibytep = coding->src_multibyte;
1660   struct charset *charset;
1661   unsigned code;
1662   int c;
1663   int consumed_chars = 0;
1664
1665   ONE_MORE_BYTE (c);
1666   if (c < 0)
1667     {
1668       c = -c;
1669       charset = emacs_mule_charset[0];
1670     }
1671   else
1672     {
1673       switch (emacs_mule_bytes[c])
1674         {
1675         case 2:
1676           if (! (charset = emacs_mule_charset[c]))
1677             goto invalid_code;
1678           ONE_MORE_BYTE (c);
1679           if (c < 0)
1680             goto invalid_code;
1681           code = c & 0x7F;
1682           break;
1683
1684         case 3:
1685           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1686               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1687             {
1688               ONE_MORE_BYTE (c);
1689               if (c < 0 || ! (charset = emacs_mule_charset[c]))
1690                 goto invalid_code;
1691               ONE_MORE_BYTE (c);
1692               if (c < 0)
1693                 goto invalid_code;
1694               code = c & 0x7F;
1695             }
1696           else
1697             {
1698               if (! (charset = emacs_mule_charset[c]))
1699                 goto invalid_code;
1700               ONE_MORE_BYTE (c);
1701               if (c < 0)
1702                 goto invalid_code;
1703               code = (c & 0x7F) << 8;
1704               ONE_MORE_BYTE (c);
1705               if (c < 0)
1706                 goto invalid_code;
1707               code |= c & 0x7F;
1708             }
1709           break;
1710
1711         case 4:
1712           ONE_MORE_BYTE (c);
1713           if (c < 0 || ! (charset = emacs_mule_charset[c]))
1714             goto invalid_code;
1715           ONE_MORE_BYTE (c);
1716           if (c < 0)
1717             goto invalid_code;
1718           code = (c & 0x7F) << 8;
1719           ONE_MORE_BYTE (c);
1720           if (c < 0)
1721             goto invalid_code;
1722           code |= c & 0x7F;
1723           break;
1724
1725         case 1:
1726           code = c;
1727           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1728                                      ? charset_ascii : charset_eight_bit);
1729           break;
1730
1731         default:
1732           abort ();
1733         }
1734       c = DECODE_CHAR (charset, code);
1735       if (c < 0)
1736         goto invalid_code;
1737     }
1738   *nbytes = src - src_base;
1739   *nchars = consumed_chars;
1740   if (id)
1741     *id = charset->id;
1742   return c;
1743
1744  no_more_source:
1745   return -2;
1746
1747  invalid_code:
1748   return -1;
1749 }
1750
1751
1752 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1753    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1754    else return 0.  */
1755
1756 static int
1757 detect_coding_emacs_mule (coding, detect_info)
1758      struct coding_system *coding;
1759      struct coding_detection_info *detect_info;
1760 {
1761   const unsigned char *src = coding->source, *src_base;
1762   const unsigned char *src_end = coding->source + coding->src_bytes;
1763   int multibytep = coding->src_multibyte;
1764   int consumed_chars = 0;
1765   int c;
1766   int found = 0;
1767
1768   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1769   /* A coding system of this category is always ASCII compatible.  */
1770   src += coding->head_ascii;
1771
1772   while (1)
1773     {
1774       src_base = src;
1775       ONE_MORE_BYTE (c);
1776       if (c < 0)
1777         continue;
1778       if (c == 0x80)
1779         {
1780           /* Perhaps the start of composite character.  We simple skip
1781              it because analyzing it is too heavy for detecting.  But,
1782              at least, we check that the composite character
1783              constitues of more than 4 bytes.  */
1784           const unsigned char *src_base;
1785
1786         repeat:
1787           src_base = src;
1788           do
1789             {
1790               ONE_MORE_BYTE (c);
1791             }
1792           while (c >= 0xA0);
1793
1794           if (src - src_base <= 4)
1795             break;
1796           found = CATEGORY_MASK_EMACS_MULE;
1797           if (c == 0x80)
1798             goto repeat;
1799         }
1800
1801       if (c < 0x80)
1802         {
1803           if (c < 0x20
1804               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1805             break;
1806         }
1807       else
1808         {
1809           const unsigned char *src_base = src - 1;
1810
1811           do
1812             {
1813               ONE_MORE_BYTE (c);
1814             }
1815           while (c >= 0xA0);
1816           if (src - src_base != emacs_mule_bytes[*src_base])
1817             break;
1818           found = CATEGORY_MASK_EMACS_MULE;
1819         }
1820     }
1821   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1822   return 0;
1823
1824  no_more_source:
1825   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1826     {
1827       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1828       return 0;
1829     }
1830   detect_info->found |= found;
1831   return 1;
1832 }
1833
1834
1835 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1836
1837 /* Decode a character represented as a component of composition
1838    sequence of Emacs 20/21 style at SRC.  Set C to that character and
1839    update SRC to the head of next character (or an encoded composition
1840    rule).  If SRC doesn't points a composition component, set C to -1.
1841    If SRC points an invalid byte sequence, global exit by a return
1842    value 0.  */
1843
1844 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
1845   if (1)                                                        \
1846     {                                                           \
1847       int c;                                                    \
1848       int nbytes, nchars;                                       \
1849                                                                 \
1850       if (src == src_end)                                       \
1851         break;                                                  \
1852       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1853       if (c < 0)                                                \
1854         {                                                       \
1855           if (c == -2)                                          \
1856             break;                                              \
1857           goto invalid_code;                                    \
1858         }                                                       \
1859       *buf++ = c;                                               \
1860       src += nbytes;                                            \
1861       consumed_chars += nchars;                                 \
1862     }                                                           \
1863   else
1864
1865
1866 /* Decode a composition rule represented as a component of composition
1867    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
1868    and increment BUF.  If SRC points an invalid byte sequence, set C
1869    to -1.  */
1870
1871 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
1872   do {                                                  \
1873     int c, gref, nref;                                  \
1874                                                         \
1875     if (src >= src_end)                                 \
1876       goto invalid_code;                                \
1877     ONE_MORE_BYTE_NO_CHECK (c);                         \
1878     c -= 0x20;                                          \
1879     if (c < 0 || c >= 81)                               \
1880       goto invalid_code;                                \
1881                                                         \
1882     gref = c / 9, nref = c % 9;                         \
1883     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1884   } while (0)
1885
1886
1887 /* Decode a composition rule represented as a component of composition
1888    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
1889    and increment BUF.  If SRC points an invalid byte sequence, set C
1890    to -1.  */
1891
1892 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
1893   do {                                                  \
1894     int gref, nref;                                     \
1895                                                         \
1896     if (src + 1>= src_end)                              \
1897       goto invalid_code;                                \
1898     ONE_MORE_BYTE_NO_CHECK (gref);                      \
1899     gref -= 0x20;                                       \
1900     ONE_MORE_BYTE_NO_CHECK (nref);                      \
1901     nref -= 0x20;                                       \
1902     if (gref < 0 || gref >= 81                          \
1903         || nref < 0 || nref >= 81)                      \
1904       goto invalid_code;                                \
1905     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1906   } while (0)
1907
1908
1909 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
1910   do {                                                                  \
1911     /* Emacs 21 style format.  The first three bytes at SRC are         \
1912        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
1913        the byte length of this composition information, CHARS is the    \
1914        number of characters composed by this composition.  */           \
1915     enum composition_method method = c - 0xF2;                          \
1916     int *charbuf_base = charbuf;                                        \
1917     int from, to;                                                       \
1918     int consumed_chars_limit;                                           \
1919     int nbytes, nchars;                                                 \
1920                                                                         \
1921     ONE_MORE_BYTE (c);                                                  \
1922     if (c < 0)                                                          \
1923       goto invalid_code;                                                \
1924     nbytes = c - 0xA0;                                                  \
1925     if (nbytes < 3)                                                     \
1926       goto invalid_code;                                                \
1927     ONE_MORE_BYTE (c);                                                  \
1928     if (c < 0)                                                          \
1929       goto invalid_code;                                                \
1930     nchars = c - 0xA0;                                                  \
1931     from = coding->produced + char_offset;                              \
1932     to = from + nchars;                                                 \
1933     ADD_COMPOSITION_DATA (charbuf, from, to, method);                   \
1934     consumed_chars_limit = consumed_chars_base + nbytes;                \
1935     if (method != COMPOSITION_RELATIVE)                                 \
1936       {                                                                 \
1937         int i = 0;                                                      \
1938         while (consumed_chars < consumed_chars_limit)                   \
1939           {                                                             \
1940             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
1941               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
1942             else                                                        \
1943               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
1944             i++;                                                        \
1945           }                                                             \
1946         if (consumed_chars < consumed_chars_limit)                      \
1947           goto invalid_code;                                            \
1948         charbuf_base[0] -= i;                                           \
1949       }                                                                 \
1950   } while (0)
1951
1952
1953 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)            \
1954   do {                                                          \
1955     /* Emacs 20 style format for relative composition.  */      \
1956     /* Store multibyte form of characters to be composed.  */   \
1957     enum composition_method method = COMPOSITION_RELATIVE;      \
1958     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1959     int *buf = components;                                      \
1960     int i, j;                                                   \
1961     int from, to;                                               \
1962                                                                 \
1963     src = src_base;                                             \
1964     ONE_MORE_BYTE (c);          /* skip 0x80 */                 \
1965     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1966       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                 \
1967     if (i < 2)                                                  \
1968       goto invalid_code;                                        \
1969     from = coding->produced_char + char_offset;                 \
1970     to = from + i;                                              \
1971     ADD_COMPOSITION_DATA (charbuf, from, to, method);           \
1972     for (j = 0; j < i; j++)                                     \
1973       *charbuf++ = components[j];                               \
1974   } while (0)
1975
1976
1977 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
1978   do {                                                          \
1979     /* Emacs 20 style format for rule-base composition.  */     \
1980     /* Store multibyte form of characters to be composed.  */   \
1981     enum composition_method method = COMPOSITION_WITH_RULE;     \
1982     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1983     int *buf = components;                                      \
1984     int i, j;                                                   \
1985     int from, to;                                               \
1986                                                                 \
1987     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
1988     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1989       {                                                         \
1990         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
1991         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
1992       }                                                         \
1993     if (i < 1 || (buf - components) % 2 == 0)                   \
1994       goto invalid_code;                                        \
1995     if (charbuf + i + (i / 2) + 1 < charbuf_end)                \
1996       goto no_more_source;                                      \
1997     from = coding->produced_char + char_offset;                 \
1998     to = from + i;                                              \
1999     ADD_COMPOSITION_DATA (buf, from, to, method);               \
2000     for (j = 0; j < i; j++)                                     \
2001       *charbuf++ = components[j];                               \
2002     for (j = 0; j < i; j += 2)                                  \
2003       *charbuf++ = components[j];                               \
2004   } while (0)
2005
2006
2007 static void
2008 decode_coding_emacs_mule (coding)
2009      struct coding_system *coding;
2010 {
2011   const unsigned char *src = coding->source + coding->consumed;
2012   const unsigned char *src_end = coding->source + coding->src_bytes;
2013   const unsigned char *src_base;
2014   int *charbuf = coding->charbuf;
2015   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2016   int consumed_chars = 0, consumed_chars_base;
2017   int multibytep = coding->src_multibyte;
2018   Lisp_Object attrs, charset_list;
2019   int char_offset = coding->produced_char;
2020   int last_offset = char_offset;
2021   int last_id = charset_ascii;
2022
2023   CODING_GET_INFO (coding, attrs, charset_list);
2024
2025   while (1)
2026     {
2027       int c;
2028
2029       src_base = src;
2030       consumed_chars_base = consumed_chars;
2031
2032       if (charbuf >= charbuf_end)
2033         break;
2034
2035       ONE_MORE_BYTE (c);
2036       if (c < 0)
2037         {
2038           *charbuf++ = -c;
2039           char_offset++;
2040         }
2041       else if (c < 0x80)
2042         {
2043           *charbuf++ = c;
2044           char_offset++;
2045         }
2046       else if (c == 0x80)
2047         {
2048           ONE_MORE_BYTE (c);
2049           if (c < 0)
2050             goto invalid_code;
2051           if (c - 0xF2 >= COMPOSITION_RELATIVE
2052               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2053             DECODE_EMACS_MULE_21_COMPOSITION (c);
2054           else if (c < 0xC0)
2055             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2056           else if (c == 0xFF)
2057             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2058           else
2059             goto invalid_code;
2060         }
2061       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2062         {
2063           int nbytes, nchars;
2064           int id;
2065
2066           src = src_base;
2067           consumed_chars = consumed_chars_base;
2068           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2069           if (c < 0)
2070             {
2071               if (c == -2)
2072                 break;
2073               goto invalid_code;
2074             }
2075           if (last_id != id)
2076             {
2077               if (last_id != charset_ascii)
2078                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2079               last_id = id;
2080               last_offset = char_offset;
2081             }
2082           *charbuf++ = c;
2083           src += nbytes;
2084           consumed_chars += nchars;
2085           char_offset++;
2086         }
2087       continue;
2088
2089     invalid_code:
2090       src = src_base;
2091       consumed_chars = consumed_chars_base;
2092       ONE_MORE_BYTE (c);
2093       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2094       char_offset++;
2095       coding->errors++;
2096     }
2097
2098  no_more_source:
2099   if (last_id != charset_ascii)
2100     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2101   coding->consumed_char += consumed_chars_base;
2102   coding->consumed = src_base - coding->source;
2103   coding->charbuf_used = charbuf - coding->charbuf;
2104 }
2105
2106
2107 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2108   do {                                          \
2109     if (id < 0xA0)                              \
2110       codes[0] = id, codes[1] = 0;              \
2111     else if (id < 0xE0)                         \
2112       codes[0] = 0x9A, codes[1] = id;           \
2113     else if (id < 0xF0)                         \
2114       codes[0] = 0x9B, codes[1] = id;           \
2115     else if (id < 0xF5)                         \
2116       codes[0] = 0x9C, codes[1] = id;           \
2117     else                                        \
2118       codes[0] = 0x9D, codes[1] = id;           \
2119   } while (0);
2120
2121
2122 static int
2123 encode_coding_emacs_mule (coding)
2124      struct coding_system *coding;
2125 {
2126   int multibytep = coding->dst_multibyte;
2127   int *charbuf = coding->charbuf;
2128   int *charbuf_end = charbuf + coding->charbuf_used;
2129   unsigned char *dst = coding->destination + coding->produced;
2130   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2131   int safe_room = 8;
2132   int produced_chars = 0;
2133   Lisp_Object attrs, charset_list;
2134   int c;
2135   int preferred_charset_id = -1;
2136
2137   CODING_GET_INFO (coding, attrs, charset_list);
2138   if (! EQ (charset_list, Vemacs_mule_charset_list))
2139     {
2140       CODING_ATTR_CHARSET_LIST (attrs)
2141         = charset_list = Vemacs_mule_charset_list;
2142     }
2143
2144   while (charbuf < charbuf_end)
2145     {
2146       ASSURE_DESTINATION (safe_room);
2147       c = *charbuf++;
2148
2149       if (c < 0)
2150         {
2151           /* Handle an annotation.  */
2152           switch (*charbuf)
2153             {
2154             case CODING_ANNOTATE_COMPOSITION_MASK:
2155               /* Not yet implemented.  */
2156               break;
2157             case CODING_ANNOTATE_CHARSET_MASK:
2158               preferred_charset_id = charbuf[3];
2159               if (preferred_charset_id >= 0
2160                   && NILP (Fmemq (make_number (preferred_charset_id),
2161                                   charset_list)))
2162                 preferred_charset_id = -1;
2163               break;
2164             default:
2165               abort ();
2166             }
2167           charbuf += -c - 1;
2168           continue;
2169         }
2170
2171       if (ASCII_CHAR_P (c))
2172         EMIT_ONE_ASCII_BYTE (c);
2173       else if (CHAR_BYTE8_P (c))
2174         {
2175           c = CHAR_TO_BYTE8 (c);
2176           EMIT_ONE_BYTE (c);
2177         }
2178       else
2179         {
2180           struct charset *charset;
2181           unsigned code;
2182           int dimension;
2183           int emacs_mule_id;
2184           unsigned char leading_codes[2];
2185
2186           if (preferred_charset_id >= 0)
2187             {
2188               charset = CHARSET_FROM_ID (preferred_charset_id);
2189               if (! CHAR_CHARSET_P (c, charset))
2190                 charset = char_charset (c, charset_list, NULL);
2191             }
2192           else
2193             charset = char_charset (c, charset_list, &code);
2194           if (! charset)
2195             {
2196               c = coding->default_char;
2197               if (ASCII_CHAR_P (c))
2198                 {
2199                   EMIT_ONE_ASCII_BYTE (c);
2200                   continue;
2201                 }
2202               charset = char_charset (c, charset_list, &code);
2203             }
2204           dimension = CHARSET_DIMENSION (charset);
2205           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2206           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2207           EMIT_ONE_BYTE (leading_codes[0]);
2208           if (leading_codes[1])
2209             EMIT_ONE_BYTE (leading_codes[1]);
2210           if (dimension == 1)
2211             EMIT_ONE_BYTE (code | 0x80);
2212           else
2213             {
2214               code |= 0x8080;
2215               EMIT_ONE_BYTE (code >> 8);
2216               EMIT_ONE_BYTE (code & 0xFF);
2217             }
2218         }
2219     }
2220   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2221   coding->produced_char += produced_chars;
2222   coding->produced = dst - coding->destination;
2223   return 0;
2224 }
2225
2226 \f
2227 /*** 7. ISO2022 handlers ***/
2228
2229 /* The following note describes the coding system ISO2022 briefly.
2230    Since the intention of this note is to help understand the
2231    functions in this file, some parts are NOT ACCURATE or are OVERLY
2232    SIMPLIFIED.  For thorough understanding, please refer to the
2233    original document of ISO2022.  This is equivalent to the standard
2234    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2235
2236    ISO2022 provides many mechanisms to encode several character sets
2237    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2238    is encoded using bytes less than 128.  This may make the encoded
2239    text a little bit longer, but the text passes more easily through
2240    several types of gateway, some of which strip off the MSB (Most
2241    Significant Bit).
2242
2243    There are two kinds of character sets: control character sets and
2244    graphic character sets.  The former contain control characters such
2245    as `newline' and `escape' to provide control functions (control
2246    functions are also provided by escape sequences).  The latter
2247    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2248    two control character sets and many graphic character sets.
2249
2250    Graphic character sets are classified into one of the following
2251    four classes, according to the number of bytes (DIMENSION) and
2252    number of characters in one dimension (CHARS) of the set:
2253    - DIMENSION1_CHARS94
2254    - DIMENSION1_CHARS96
2255    - DIMENSION2_CHARS94
2256    - DIMENSION2_CHARS96
2257
2258    In addition, each character set is assigned an identification tag,
2259    unique for each set, called the "final character" (denoted as <F>
2260    hereafter).  The <F> of each character set is decided by ECMA(*)
2261    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2262    (0x30..0x3F are for private use only).
2263
2264    Note (*): ECMA = European Computer Manufacturers Association
2265
2266    Here are examples of graphic character sets [NAME(<F>)]:
2267         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2268         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2269         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2270         o DIMENSION2_CHARS96 -- none for the moment
2271
2272    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2273         C0 [0x00..0x1F] -- control character plane 0
2274         GL [0x20..0x7F] -- graphic character plane 0
2275         C1 [0x80..0x9F] -- control character plane 1
2276         GR [0xA0..0xFF] -- graphic character plane 1
2277
2278    A control character set is directly designated and invoked to C0 or
2279    C1 by an escape sequence.  The most common case is that:
2280    - ISO646's  control character set is designated/invoked to C0, and
2281    - ISO6429's control character set is designated/invoked to C1,
2282    and usually these designations/invocations are omitted in encoded
2283    text.  In a 7-bit environment, only C0 can be used, and a control
2284    character for C1 is encoded by an appropriate escape sequence to
2285    fit into the environment.  All control characters for C1 are
2286    defined to have corresponding escape sequences.
2287
2288    A graphic character set is at first designated to one of four
2289    graphic registers (G0 through G3), then these graphic registers are
2290    invoked to GL or GR.  These designations and invocations can be
2291    done independently.  The most common case is that G0 is invoked to
2292    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2293    these invocations and designations are omitted in encoded text.
2294    In a 7-bit environment, only GL can be used.
2295
2296    When a graphic character set of CHARS94 is invoked to GL, codes
2297    0x20 and 0x7F of the GL area work as control characters SPACE and
2298    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2299    be used.
2300
2301    There are two ways of invocation: locking-shift and single-shift.
2302    With locking-shift, the invocation lasts until the next different
2303    invocation, whereas with single-shift, the invocation affects the
2304    following character only and doesn't affect the locking-shift
2305    state.  Invocations are done by the following control characters or
2306    escape sequences:
2307
2308    ----------------------------------------------------------------------
2309    abbrev  function                  cntrl escape seq   description
2310    ----------------------------------------------------------------------
2311    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2312    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2313    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2314    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2315    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2316    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2317    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2318    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2319    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2320    ----------------------------------------------------------------------
2321    (*) These are not used by any known coding system.
2322
2323    Control characters for these functions are defined by macros
2324    ISO_CODE_XXX in `coding.h'.
2325
2326    Designations are done by the following escape sequences:
2327    ----------------------------------------------------------------------
2328    escape sequence      description
2329    ----------------------------------------------------------------------
2330    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2331    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2332    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2333    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2334    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2335    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2336    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2337    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2338    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2339    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2340    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2341    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2342    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2343    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2344    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2345    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2346    ----------------------------------------------------------------------
2347
2348    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2349    of dimension 1, chars 94, and final character <F>, etc...
2350
2351    Note (*): Although these designations are not allowed in ISO2022,
2352    Emacs accepts them on decoding, and produces them on encoding
2353    CHARS96 character sets in a coding system which is characterized as
2354    7-bit environment, non-locking-shift, and non-single-shift.
2355
2356    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2357    '(' must be omitted.  We refer to this as "short-form" hereafter.
2358
2359    Now you may notice that there are a lot of ways of encoding the
2360    same multilingual text in ISO2022.  Actually, there exist many
2361    coding systems such as Compound Text (used in X11's inter client
2362    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2363    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2364    localized platforms), and all of these are variants of ISO2022.
2365
2366    In addition to the above, Emacs handles two more kinds of escape
2367    sequences: ISO6429's direction specification and Emacs' private
2368    sequence for specifying character composition.
2369
2370    ISO6429's direction specification takes the following form:
2371         o CSI ']'      -- end of the current direction
2372         o CSI '0' ']'  -- end of the current direction
2373         o CSI '1' ']'  -- start of left-to-right text
2374         o CSI '2' ']'  -- start of right-to-left text
2375    The control character CSI (0x9B: control sequence introducer) is
2376    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2377
2378    Character composition specification takes the following form:
2379         o ESC '0' -- start relative composition
2380         o ESC '1' -- end composition
2381         o ESC '2' -- start rule-base composition (*)
2382         o ESC '3' -- start relative composition with alternate chars  (**)
2383         o ESC '4' -- start rule-base composition with alternate chars  (**)
2384   Since these are not standard escape sequences of any ISO standard,
2385   the use of them with these meanings is restricted to Emacs only.
2386
2387   (*) This form is used only in Emacs 20.7 and older versions,
2388   but newer versions can safely decode it.
2389   (**) This form is used only in Emacs 21.1 and newer versions,
2390   and older versions can't decode it.
2391
2392   Here's a list of example usages of these composition escape
2393   sequences (categorized by `enum composition_method').
2394
2395   COMPOSITION_RELATIVE:
2396         ESC 0 CHAR [ CHAR ] ESC 1
2397   COMPOSITION_WITH_RULE:
2398         ESC 2 CHAR [ RULE CHAR ] ESC 1
2399   COMPOSITION_WITH_ALTCHARS:
2400         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2401   COMPOSITION_WITH_RULE_ALTCHARS:
2402         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2403
2404 enum iso_code_class_type iso_code_class[256];
2405
2406 #define SAFE_CHARSET_P(coding, id)      \
2407   ((id) <= (coding)->max_charset_id     \
2408    && (coding)->safe_charsets[id] >= 0)
2409
2410
2411 #define SHIFT_OUT_OK(category)  \
2412   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2413
2414 static void
2415 setup_iso_safe_charsets (attrs)
2416      Lisp_Object attrs;
2417 {
2418   Lisp_Object charset_list, safe_charsets;
2419   Lisp_Object request;
2420   Lisp_Object reg_usage;
2421   Lisp_Object tail;
2422   int reg94, reg96;
2423   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2424   int max_charset_id;
2425
2426   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2427   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2428       && ! EQ (charset_list, Viso_2022_charset_list))
2429     {
2430       CODING_ATTR_CHARSET_LIST (attrs)
2431         = charset_list = Viso_2022_charset_list;
2432       ASET (attrs, coding_attr_safe_charsets, Qnil);
2433     }
2434
2435   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2436     return;
2437
2438   max_charset_id = 0;
2439   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2440     {
2441       int id = XINT (XCAR (tail));
2442       if (max_charset_id < id)
2443         max_charset_id = id;
2444     }
2445
2446   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2447                                 make_number (255));
2448   request = AREF (attrs, coding_attr_iso_request);
2449   reg_usage = AREF (attrs, coding_attr_iso_usage);
2450   reg94 = XINT (XCAR (reg_usage));
2451   reg96 = XINT (XCDR (reg_usage));
2452
2453   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2454     {
2455       Lisp_Object id;
2456       Lisp_Object reg;
2457       struct charset *charset;
2458
2459       id = XCAR (tail);
2460       charset = CHARSET_FROM_ID (XINT (id));
2461       reg = Fcdr (Fassq (id, request));
2462       if (! NILP (reg))
2463         SSET (safe_charsets, XINT (id), XINT (reg));
2464       else if (charset->iso_chars_96)
2465         {
2466           if (reg96 < 4)
2467             SSET (safe_charsets, XINT (id), reg96);
2468         }
2469       else
2470         {
2471           if (reg94 < 4)
2472             SSET (safe_charsets, XINT (id), reg94);
2473         }
2474     }
2475   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2476 }
2477
2478
2479 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2480    Check if a text is encoded in one of ISO-2022 based codig systems.
2481    If it is, return 1, else return 0.  */
2482
2483 static int
2484 detect_coding_iso_2022 (coding, detect_info)
2485      struct coding_system *coding;
2486      struct coding_detection_info *detect_info;
2487 {
2488   const unsigned char *src = coding->source, *src_base = src;
2489   const unsigned char *src_end = coding->source + coding->src_bytes;
2490   int multibytep = coding->src_multibyte;
2491   int single_shifting = 0;
2492   int id;
2493   int c, c1;
2494   int consumed_chars = 0;
2495   int i;
2496   int rejected = 0;
2497   int found = 0;
2498
2499   detect_info->checked |= CATEGORY_MASK_ISO;
2500
2501   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2502     {
2503       struct coding_system *this = &(coding_categories[i]);
2504       Lisp_Object attrs, val;
2505
2506       attrs = CODING_ID_ATTRS (this->id);
2507       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2508           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2509         setup_iso_safe_charsets (attrs);
2510       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2511       this->max_charset_id = SCHARS (val) - 1;
2512       this->safe_charsets = (char *) SDATA (val);
2513     }
2514
2515   /* A coding system of this category is always ASCII compatible.  */
2516   src += coding->head_ascii;
2517
2518   while (rejected != CATEGORY_MASK_ISO)
2519     {
2520       src_base = src;
2521       ONE_MORE_BYTE (c);
2522       switch (c)
2523         {
2524         case ISO_CODE_ESC:
2525           if (inhibit_iso_escape_detection)
2526             break;
2527           single_shifting = 0;
2528           ONE_MORE_BYTE (c);
2529           if (c >= '(' && c <= '/')
2530             {
2531               /* Designation sequence for a charset of dimension 1.  */
2532               ONE_MORE_BYTE (c1);
2533               if (c1 < ' ' || c1 >= 0x80
2534                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2535                 /* Invalid designation sequence.  Just ignore.  */
2536                 break;
2537             }
2538           else if (c == '$')
2539             {
2540               /* Designation sequence for a charset of dimension 2.  */
2541               ONE_MORE_BYTE (c);
2542               if (c >= '@' && c <= 'B')
2543                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2544                 id = iso_charset_table[1][0][c];
2545               else if (c >= '(' && c <= '/')
2546                 {
2547                   ONE_MORE_BYTE (c1);
2548                   if (c1 < ' ' || c1 >= 0x80
2549                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2550                     /* Invalid designation sequence.  Just ignore.  */
2551                     break;
2552                 }
2553               else
2554                 /* Invalid designation sequence.  Just ignore it.  */
2555                 break;
2556             }
2557           else if (c == 'N' || c == 'O')
2558             {
2559               /* ESC <Fe> for SS2 or SS3.  */
2560               single_shifting = 1;
2561               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2562               break;
2563             }
2564           else if (c >= '0' && c <= '4')
2565             {
2566               /* ESC <Fp> for start/end composition.  */
2567               found |= CATEGORY_MASK_ISO;
2568               break;
2569             }
2570           else
2571             {
2572               /* Invalid escape sequence.  Just ignore it.  */
2573               break;
2574             }
2575
2576           /* We found a valid designation sequence for CHARSET.  */
2577           rejected |= CATEGORY_MASK_ISO_8BIT;
2578           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2579                               id))
2580             found |= CATEGORY_MASK_ISO_7;
2581           else
2582             rejected |= CATEGORY_MASK_ISO_7;
2583           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2584                               id))
2585             found |= CATEGORY_MASK_ISO_7_TIGHT;
2586           else
2587             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2588           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2589                               id))
2590             found |= CATEGORY_MASK_ISO_7_ELSE;
2591           else
2592             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2593           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2594                               id))
2595             found |= CATEGORY_MASK_ISO_8_ELSE;
2596           else
2597             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2598           break;
2599
2600         case ISO_CODE_SO:
2601         case ISO_CODE_SI:
2602           /* Locking shift out/in.  */
2603           if (inhibit_iso_escape_detection)
2604             break;
2605           single_shifting = 0;
2606           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2607           found |= CATEGORY_MASK_ISO_ELSE;
2608           break;
2609
2610         case ISO_CODE_CSI:
2611           /* Control sequence introducer.  */
2612           single_shifting = 0;
2613           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2614           found |= CATEGORY_MASK_ISO_8_ELSE;
2615           goto check_extra_latin;
2616
2617
2618         case ISO_CODE_SS2:
2619         case ISO_CODE_SS3:
2620           /* Single shift.   */
2621           if (inhibit_iso_escape_detection)
2622             break;
2623           single_shifting = 1;
2624           rejected |= CATEGORY_MASK_ISO_7BIT;
2625           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2626               & CODING_ISO_FLAG_SINGLE_SHIFT)
2627             found |= CATEGORY_MASK_ISO_8_1;
2628           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2629               & CODING_ISO_FLAG_SINGLE_SHIFT)
2630             found |= CATEGORY_MASK_ISO_8_2;
2631           goto check_extra_latin;
2632
2633         default:
2634           if (c < 0)
2635             continue;
2636           if (c < 0x80)
2637             {
2638               single_shifting = 0;
2639               break;
2640             }
2641           if (c >= 0xA0)
2642             {
2643               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2644               found |= CATEGORY_MASK_ISO_8_1;
2645               /* Check the length of succeeding codes of the range
2646                  0xA0..0FF.  If the byte length is even, we include
2647                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2648                  only when we are not single shifting.  */
2649               if (! single_shifting
2650                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2651                 {
2652                   int i = 1;
2653                   while (src < src_end)
2654                     {
2655                       ONE_MORE_BYTE (c);
2656                       if (c < 0xA0)
2657                         break;
2658                       i++;
2659                     }
2660
2661                   if (i & 1 && src < src_end)
2662                     rejected |= CATEGORY_MASK_ISO_8_2;
2663                   else
2664                     found |= CATEGORY_MASK_ISO_8_2;
2665                 }
2666               break;
2667             }
2668         check_extra_latin:
2669           single_shifting = 0;
2670           if (! VECTORP (Vlatin_extra_code_table)
2671               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2672             {
2673               rejected = CATEGORY_MASK_ISO;
2674               break;
2675             }
2676           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2677               & CODING_ISO_FLAG_LATIN_EXTRA)
2678             found |= CATEGORY_MASK_ISO_8_1;
2679           else
2680             rejected |= CATEGORY_MASK_ISO_8_1;
2681           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2682               & CODING_ISO_FLAG_LATIN_EXTRA)
2683             found |= CATEGORY_MASK_ISO_8_2;
2684           else
2685             rejected |= CATEGORY_MASK_ISO_8_2;
2686         }
2687     }
2688   detect_info->rejected |= CATEGORY_MASK_ISO;
2689   return 0;
2690
2691  no_more_source:
2692   detect_info->rejected |= rejected;
2693   detect_info->found |= (found & ~rejected);
2694   return 1;
2695 }
2696
2697
2698 /* Set designation state into CODING.  */
2699 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2700   do {                                                                  \
2701     int id, prev;                                                       \
2702                                                                         \
2703     if (final < '0' || final >= 128                                     \
2704         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2705         || !SAFE_CHARSET_P (coding, id))                                \
2706       {                                                                 \
2707         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2708         goto invalid_code;                                              \
2709       }                                                                 \
2710     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2711     if (id == charset_jisx0201_roman)                                   \
2712       {                                                                 \
2713         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2714           id = charset_ascii;                                           \
2715       }                                                                 \
2716     else if (id == charset_jisx0208_1978)                               \
2717       {                                                                 \
2718         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2719           id = charset_jisx0208;                                        \
2720       }                                                                 \
2721     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2722     /* If there was an invalid designation to REG previously, and this  \
2723        designation is ASCII to REG, we should keep this designation     \
2724        sequence.  */                                                    \
2725     if (prev == -2 && id == charset_ascii)                              \
2726       goto invalid_code;                                                \
2727   } while (0)
2728
2729
2730 #define MAYBE_FINISH_COMPOSITION()                              \
2731   do {                                                          \
2732     int i;                                                      \
2733     if (composition_state == COMPOSING_NO)                      \
2734       break;                                                    \
2735     /* It is assured that we have enough room for producing     \
2736        characters stored in the table `components'.  */         \
2737     if (charbuf + component_idx > charbuf_end)                  \
2738       goto no_more_source;                                      \
2739     composition_state = COMPOSING_NO;                           \
2740     if (method == COMPOSITION_RELATIVE                          \
2741         || method == COMPOSITION_WITH_ALTCHARS)                 \
2742       {                                                         \
2743         for (i = 0; i < component_idx; i++)                     \
2744           *charbuf++ = components[i];                           \
2745         char_offset += component_idx;                           \
2746       }                                                         \
2747     else                                                        \
2748       {                                                         \
2749         for (i = 0; i < component_idx; i += 2)                  \
2750           *charbuf++ = components[i];                           \
2751         char_offset += (component_idx / 2) + 1;                 \
2752       }                                                         \
2753   } while (0)
2754
2755
2756 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2757    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2758    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2759    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2760    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2761   */
2762
2763 #define DECODE_COMPOSITION_START(c1)                                    \
2764   do {                                                                  \
2765     if (c1 == '0'                                                       \
2766         && composition_state == COMPOSING_COMPONENT_RULE)               \
2767       {                                                                 \
2768         component_len = component_idx;                                  \
2769         composition_state = COMPOSING_CHAR;                             \
2770       }                                                                 \
2771     else                                                                \
2772       {                                                                 \
2773         const unsigned char *p;                                         \
2774                                                                         \
2775         MAYBE_FINISH_COMPOSITION ();                                    \
2776         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
2777           goto no_more_source;                                          \
2778         for (p = src; p < src_end - 1; p++)                             \
2779           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
2780             break;                                                      \
2781         if (p == src_end - 1)                                           \
2782           {                                                             \
2783             if (coding->mode & CODING_MODE_LAST_BLOCK)                  \
2784               goto invalid_code;                                        \
2785             goto no_more_source;                                        \
2786           }                                                             \
2787                                                                         \
2788         /* This is surely the start of a composition.  */               \
2789         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
2790                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
2791                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
2792                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
2793         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
2794                              : COMPOSING_COMPONENT_CHAR);               \
2795         component_idx = component_len = 0;                              \
2796       }                                                                 \
2797   } while (0)
2798
2799
2800 /* Handle compositoin end sequence ESC 1.  */
2801
2802 #define DECODE_COMPOSITION_END()                                        \
2803   do {                                                                  \
2804     int nchars = (component_len > 0 ? component_idx - component_len     \
2805                   : method == COMPOSITION_RELATIVE ? component_idx      \
2806                   : (component_idx + 1) / 2);                           \
2807     int i;                                                              \
2808     int *saved_charbuf = charbuf;                                       \
2809     int from = char_offset;                                             \
2810     int to = from + nchars;                                             \
2811                                                                         \
2812     ADD_COMPOSITION_DATA (charbuf, from, to, method);                   \
2813     if (method != COMPOSITION_RELATIVE)                                 \
2814       {                                                                 \
2815         if (component_len == 0)                                         \
2816           for (i = 0; i < component_idx; i++)                           \
2817             *charbuf++ = components[i];                                 \
2818         else                                                            \
2819           for (i = 0; i < component_len; i++)                           \
2820             *charbuf++ = components[i];                                 \
2821         *saved_charbuf = saved_charbuf - charbuf;                       \
2822       }                                                                 \
2823     if (method == COMPOSITION_WITH_RULE)                                \
2824       for (i = 0; i < component_idx; i += 2, char_offset++)             \
2825         *charbuf++ = components[i];                                     \
2826     else                                                                \
2827       for (i = component_len; i < component_idx; i++, char_offset++)    \
2828         *charbuf++ = components[i];                                     \
2829     coding->annotated = 1;                                              \
2830     composition_state = COMPOSING_NO;                                   \
2831   } while (0)
2832
2833
2834 /* Decode a composition rule from the byte C1 (and maybe one more byte
2835    from SRC) and store one encoded composition rule in
2836    coding->cmp_data.  */
2837
2838 #define DECODE_COMPOSITION_RULE(c1)                                     \
2839   do {                                                                  \
2840     (c1) -= 32;                                                         \
2841     if (c1 < 81)                /* old format (before ver.21) */        \
2842       {                                                                 \
2843         int gref = (c1) / 9;                                            \
2844         int nref = (c1) % 9;                                            \
2845         if (gref == 4) gref = 10;                                       \
2846         if (nref == 4) nref = 10;                                       \
2847         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
2848       }                                                                 \
2849     else if (c1 < 93)           /* new format (after ver.21) */         \
2850       {                                                                 \
2851         ONE_MORE_BYTE (c2);                                             \
2852         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
2853       }                                                                 \
2854     else                                                                \
2855       c1 = 0;                                                           \
2856   } while (0)
2857
2858
2859 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2860
2861 static void
2862 decode_coding_iso_2022 (coding)
2863      struct coding_system *coding;
2864 {
2865   const unsigned char *src = coding->source + coding->consumed;
2866   const unsigned char *src_end = coding->source + coding->src_bytes;
2867   const unsigned char *src_base;
2868   int *charbuf = coding->charbuf;
2869   int *charbuf_end
2870     = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
2871   int consumed_chars = 0, consumed_chars_base;
2872   int multibytep = coding->src_multibyte;
2873   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
2874   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2875   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2876   struct charset *charset;
2877   int c;
2878   /* For handling composition sequence.  */
2879 #define COMPOSING_NO                    0
2880 #define COMPOSING_CHAR                  1
2881 #define COMPOSING_RULE                  2
2882 #define COMPOSING_COMPONENT_CHAR        3
2883 #define COMPOSING_COMPONENT_RULE        4
2884
2885   int composition_state = COMPOSING_NO;
2886   enum composition_method method;
2887   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2888   int component_idx;
2889   int component_len;
2890   Lisp_Object attrs, charset_list;
2891   int char_offset = coding->produced_char;
2892   int last_offset = char_offset;
2893   int last_id = charset_ascii;
2894
2895   CODING_GET_INFO (coding, attrs, charset_list);
2896   setup_iso_safe_charsets (attrs);
2897
2898   while (1)
2899     {
2900       int c1, c2;
2901
2902       src_base = src;
2903       consumed_chars_base = consumed_chars;
2904
2905       if (charbuf >= charbuf_end)
2906         break;
2907
2908       ONE_MORE_BYTE (c1);
2909       if (c1 < 0)
2910         goto invalid_code;
2911
2912       /* We produce at most one character.  */
2913       switch (iso_code_class [c1])
2914         {
2915         case ISO_0x20_or_0x7F:
2916           if (composition_state != COMPOSING_NO)
2917             {
2918               if (composition_state == COMPOSING_RULE
2919                   || composition_state == COMPOSING_COMPONENT_RULE)
2920                 {
2921                   DECODE_COMPOSITION_RULE (c1);
2922                   components[component_idx++] = c1;
2923                   composition_state--;
2924                   continue;
2925                 }
2926             }
2927           if (charset_id_0 < 0
2928               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2929             /* This is SPACE or DEL.  */
2930             charset = CHARSET_FROM_ID (charset_ascii);
2931           else
2932             charset = CHARSET_FROM_ID (charset_id_0);
2933           break;
2934
2935         case ISO_graphic_plane_0:
2936           if (composition_state != COMPOSING_NO)
2937             {
2938               if (composition_state == COMPOSING_RULE
2939                   || composition_state == COMPOSING_COMPONENT_RULE)
2940                 {
2941                   DECODE_COMPOSITION_RULE (c1);
2942                   components[component_idx++] = c1;
2943                   composition_state--;
2944                   continue;
2945                 }
2946             }
2947           charset = CHARSET_FROM_ID (charset_id_0);
2948           break;
2949
2950         case ISO_0xA0_or_0xFF:
2951           if (charset_id_1 < 0
2952               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2953               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2954             goto invalid_code;
2955           /* This is a graphic character, we fall down ... */
2956
2957         case ISO_graphic_plane_1:
2958           if (charset_id_1 < 0)
2959             goto invalid_code;
2960           charset = CHARSET_FROM_ID (charset_id_1);
2961           break;
2962
2963         case ISO_control_0:
2964           MAYBE_FINISH_COMPOSITION ();
2965           charset = CHARSET_FROM_ID (charset_ascii);
2966           break;
2967
2968         case ISO_control_1:
2969           MAYBE_FINISH_COMPOSITION ();
2970           goto invalid_code;
2971
2972         case ISO_shift_out:
2973           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2974               || CODING_ISO_DESIGNATION (coding, 1) < 0)
2975             goto invalid_code;
2976           CODING_ISO_INVOCATION (coding, 0) = 1;
2977           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2978           continue;
2979
2980         case ISO_shift_in:
2981           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2982             goto invalid_code;
2983           CODING_ISO_INVOCATION (coding, 0) = 0;
2984           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2985           continue;
2986
2987         case ISO_single_shift_2_7:
2988         case ISO_single_shift_2:
2989           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2990             goto invalid_code;
2991           /* SS2 is handled as an escape sequence of ESC 'N' */
2992           c1 = 'N';
2993           goto label_escape_sequence;
2994
2995         case ISO_single_shift_3:
2996           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2997             goto invalid_code;
2998           /* SS2 is handled as an escape sequence of ESC 'O' */
2999           c1 = 'O';
3000           goto label_escape_sequence;
3001
3002         case ISO_control_sequence_introducer:
3003           /* CSI is handled as an escape sequence of ESC '[' ...  */
3004           c1 = '[';
3005           goto label_escape_sequence;
3006
3007         case ISO_escape:
3008           ONE_MORE_BYTE (c1);
3009         label_escape_sequence:
3010           /* Escape sequences handled here are invocation,
3011              designation, direction specification, and character
3012              composition specification.  */
3013           switch (c1)
3014             {
3015             case '&':           /* revision of following character set */
3016               ONE_MORE_BYTE (c1);
3017               if (!(c1 >= '@' && c1 <= '~'))
3018                 goto invalid_code;
3019               ONE_MORE_BYTE (c1);
3020               if (c1 != ISO_CODE_ESC)
3021                 goto invalid_code;
3022               ONE_MORE_BYTE (c1);
3023               goto label_escape_sequence;
3024
3025             case '$':           /* designation of 2-byte character set */
3026               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3027                 goto invalid_code;
3028               ONE_MORE_BYTE (c1);
3029               if (c1 >= '@' && c1 <= 'B')
3030                 {       /* designation of JISX0208.1978, GB2312.1980,
3031                            or JISX0208.1980 */
3032                   DECODE_DESIGNATION (0, 2, 0, c1);
3033                 }
3034               else if (c1 >= 0x28 && c1 <= 0x2B)
3035                 {       /* designation of DIMENSION2_CHARS94 character set */
3036                   ONE_MORE_BYTE (c2);
3037                   DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
3038                 }
3039               else if (c1 >= 0x2C && c1 <= 0x2F)
3040                 {       /* designation of DIMENSION2_CHARS96 character set */
3041                   ONE_MORE_BYTE (c2);
3042                   DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
3043                 }
3044               else
3045                 goto invalid_code;
3046               /* We must update these variables now.  */
3047               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3048               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3049               continue;
3050
3051             case 'n':           /* invocation of locking-shift-2 */
3052               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3053                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3054                 goto invalid_code;
3055               CODING_ISO_INVOCATION (coding, 0) = 2;
3056               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3057               continue;
3058
3059             case 'o':           /* invocation of locking-shift-3 */
3060               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3061                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3062                 goto invalid_code;
3063               CODING_ISO_INVOCATION (coding, 0) = 3;
3064               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3065               continue;
3066
3067             case 'N':           /* invocation of single-shift-2 */
3068               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3069                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3070                 goto invalid_code;
3071               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
3072               ONE_MORE_BYTE (c1);
3073               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3074                 goto invalid_code;
3075               break;
3076
3077             case 'O':           /* invocation of single-shift-3 */
3078               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3079                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3080                 goto invalid_code;
3081               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
3082               ONE_MORE_BYTE (c1);
3083               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3084                 goto invalid_code;
3085               break;
3086
3087             case '0': case '2': case '3': case '4': /* start composition */
3088               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3089                 goto invalid_code;
3090               DECODE_COMPOSITION_START (c1);
3091               continue;
3092
3093             case '1':           /* end composition */
3094               if (composition_state == COMPOSING_NO)
3095                 goto invalid_code;
3096               DECODE_COMPOSITION_END ();
3097               continue;
3098
3099             case '[':           /* specification of direction */
3100               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3101                 goto invalid_code;
3102               /* For the moment, nested direction is not supported.
3103                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3104                  left-to-right, and nozero means right-to-left.  */
3105               ONE_MORE_BYTE (c1);
3106               switch (c1)
3107                 {
3108                 case ']':       /* end of the current direction */
3109                   coding->mode &= ~CODING_MODE_DIRECTION;
3110
3111                 case '0':       /* end of the current direction */
3112                 case '1':       /* start of left-to-right direction */
3113                   ONE_MORE_BYTE (c1);
3114                   if (c1 == ']')
3115                     coding->mode &= ~CODING_MODE_DIRECTION;
3116                   else
3117                     goto invalid_code;
3118                   break;
3119
3120                 case '2':       /* start of right-to-left direction */
3121                   ONE_MORE_BYTE (c1);
3122                   if (c1 == ']')
3123                     coding->mode |= CODING_MODE_DIRECTION;
3124                   else
3125                     goto invalid_code;
3126                   break;
3127
3128                 default:
3129                   goto invalid_code;
3130                 }
3131               continue;
3132
3133             case '%':
3134               ONE_MORE_BYTE (c1);
3135               if (c1 == '/')
3136                 {
3137                   /* CTEXT extended segment:
3138                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3139                      We keep these bytes as is for the moment.
3140                      They may be decoded by post-read-conversion.  */
3141                   int dim, M, L;
3142                   int size;
3143
3144                   ONE_MORE_BYTE (dim);
3145                   ONE_MORE_BYTE (M);
3146                   ONE_MORE_BYTE (L);
3147                   size = ((M - 128) * 128) + (L - 128);
3148                   if (charbuf + 8 + size > charbuf_end)
3149                     goto break_loop;
3150                   *charbuf++ = ISO_CODE_ESC;
3151                   *charbuf++ = '%';
3152                   *charbuf++ = '/';
3153                   *charbuf++ = dim;
3154                   *charbuf++ = BYTE8_TO_CHAR (M);
3155                   *charbuf++ = BYTE8_TO_CHAR (L);
3156                   while (size-- > 0)
3157                     {
3158                       ONE_MORE_BYTE (c1);
3159                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3160                     }
3161                 }
3162               else if (c1 == 'G')
3163                 {
3164                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3165                      ESC % G --UTF-8-BYTES-- ESC % @
3166                      We keep these bytes as is for the moment.
3167                      They may be decoded by post-read-conversion.  */
3168                   int *p = charbuf;
3169
3170                   if (p + 6 > charbuf_end)
3171                     goto break_loop;
3172                   *p++ = ISO_CODE_ESC;
3173                   *p++ = '%';
3174                   *p++ = 'G';
3175                   while (p < charbuf_end)
3176                     {
3177                       ONE_MORE_BYTE (c1);
3178                       if (c1 == ISO_CODE_ESC
3179                           && src + 1 < src_end
3180                           && src[0] == '%'
3181                           && src[1] == '@')
3182                         break;
3183                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3184                     }
3185                   if (p + 3 > charbuf_end)
3186                     goto break_loop;
3187                   *p++ = ISO_CODE_ESC;
3188                   *p++ = '%';
3189                   *p++ = '@';
3190                   charbuf = p;
3191                 }
3192               else
3193                 goto invalid_code;
3194               continue;
3195               break;
3196
3197             default:
3198               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3199                 goto invalid_code;
3200               if (c1 >= 0x28 && c1 <= 0x2B)
3201                 {       /* designation of DIMENSION1_CHARS94 character set */
3202                   ONE_MORE_BYTE (c2);
3203                   DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
3204                 }
3205               else if (c1 >= 0x2C && c1 <= 0x2F)
3206                 {       /* designation of DIMENSION1_CHARS96 character set */
3207                   ONE_MORE_BYTE (c2);
3208                   DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
3209                 }
3210               else
3211                 goto invalid_code;
3212               /* We must update these variables now.  */
3213               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3214               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3215               continue;
3216             }
3217         }
3218
3219       if (charset->id != charset_ascii
3220           && last_id != charset->id)
3221         {
3222           if (last_id != charset_ascii)
3223             ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3224           last_id = charset->id;
3225           last_offset = char_offset;
3226         }
3227
3228       /* Now we know CHARSET and 1st position code C1 of a character.
3229          Produce a decoded character while getting 2nd position code
3230          C2 if necessary.  */
3231       c1 &= 0x7F;
3232       if (CHARSET_DIMENSION (charset) > 1)
3233         {
3234           ONE_MORE_BYTE (c2);
3235           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3236             /* C2 is not in a valid range.  */
3237             goto invalid_code;
3238           c1 = (c1 << 8) | (c2 & 0x7F);
3239           if (CHARSET_DIMENSION (charset) > 2)
3240             {
3241               ONE_MORE_BYTE (c2);
3242               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3243                 /* C2 is not in a valid range.  */
3244                 goto invalid_code;
3245               c1 = (c1 << 8) | (c2 & 0x7F);
3246             }
3247         }
3248
3249       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3250       if (c < 0)
3251         {
3252           MAYBE_FINISH_COMPOSITION ();
3253           for (; src_base < src; src_base++, char_offset++)
3254             {
3255               if (ASCII_BYTE_P (*src_base))
3256                 *charbuf++ = *src_base;
3257               else
3258                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3259             }
3260         }
3261       else if (composition_state == COMPOSING_NO)
3262         {
3263           *charbuf++ = c;
3264           char_offset++;
3265         }
3266       else
3267         {
3268           components[component_idx++] = c;
3269           if (method == COMPOSITION_WITH_RULE
3270               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3271                   && composition_state == COMPOSING_COMPONENT_CHAR))
3272             composition_state++;
3273         }
3274       continue;
3275
3276     invalid_code:
3277       MAYBE_FINISH_COMPOSITION ();
3278       src = src_base;
3279       consumed_chars = consumed_chars_base;
3280       ONE_MORE_BYTE (c);
3281       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3282       char_offset++;
3283       coding->errors++;
3284       continue;
3285
3286     break_loop:
3287       break;
3288     }
3289
3290  no_more_source:
3291   if (last_id != charset_ascii)
3292     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3293   coding->consumed_char += consumed_chars_base;
3294   coding->consumed = src_base - coding->source;
3295   coding->charbuf_used = charbuf - coding->charbuf;
3296 }
3297
3298
3299 /* ISO2022 encoding stuff.  */
3300
3301 /*
3302    It is not enough to say just "ISO2022" on encoding, we have to
3303    specify more details.  In Emacs, each coding system of ISO2022
3304    variant has the following specifications:
3305         1. Initial designation to G0 thru G3.
3306         2. Allows short-form designation?
3307         3. ASCII should be designated to G0 before control characters?
3308         4. ASCII should be designated to G0 at end of line?
3309         5. 7-bit environment or 8-bit environment?
3310         6. Use locking-shift?
3311         7. Use Single-shift?
3312    And the following two are only for Japanese:
3313         8. Use ASCII in place of JIS0201-1976-Roman?
3314         9. Use JISX0208-1983 in place of JISX0208-1978?
3315    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3316    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3317    details.
3318 */
3319
3320 /* Produce codes (escape sequence) for designating CHARSET to graphic
3321    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3322    '@', 'A', or 'B' and the coding system CODING allows, produce
3323    designation sequence of short-form.  */
3324
3325 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3326   do {                                                                  \
3327     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3328     char *intermediate_char_94 = "()*+";                                \
3329     char *intermediate_char_96 = ",-./";                                \
3330     int revision = -1;                                                  \
3331     int c;                                                              \
3332                                                                         \
3333     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3334       revision = CHARSET_ISO_REVISION (charset);                        \
3335                                                                         \
3336     if (revision >= 0)                                                  \
3337       {                                                                 \
3338         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3339         EMIT_ONE_BYTE ('@' + revision);                                 \
3340       }                                                                 \
3341     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3342     if (CHARSET_DIMENSION (charset) == 1)                               \
3343       {                                                                 \
3344         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3345           c = intermediate_char_94[reg];                                \
3346         else                                                            \
3347           c = intermediate_char_96[reg];                                \
3348         EMIT_ONE_ASCII_BYTE (c);                                        \
3349       }                                                                 \
3350     else                                                                \
3351       {                                                                 \
3352         EMIT_ONE_ASCII_BYTE ('$');                                      \
3353         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3354           {                                                             \
3355             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3356                 || reg != 0                                             \
3357                 || final_char < '@' || final_char > 'B')                \
3358               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3359           }                                                             \
3360         else                                                            \
3361           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3362       }                                                                 \
3363     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3364                                                                         \
3365     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3366   } while (0)
3367
3368
3369 /* The following two macros produce codes (control character or escape
3370    sequence) for ISO2022 single-shift functions (single-shift-2 and
3371    single-shift-3).  */
3372
3373 #define ENCODE_SINGLE_SHIFT_2                                           \
3374   do {                                                                  \
3375     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3376       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3377     else                                                                \
3378       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3379     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3380   } while (0)
3381
3382
3383 #define ENCODE_SINGLE_SHIFT_3                                           \
3384   do {                                                                  \
3385     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3386       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3387     else                                                                \
3388       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3389     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3390   } while (0)
3391
3392
3393 /* The following four macros produce codes (control character or
3394    escape sequence) for ISO2022 locking-shift functions (shift-in,
3395    shift-out, locking-shift-2, and locking-shift-3).  */
3396
3397 #define ENCODE_SHIFT_IN                                 \
3398   do {                                                  \
3399     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3400     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3401   } while (0)
3402
3403
3404 #define ENCODE_SHIFT_OUT                                \
3405   do {                                                  \
3406     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3407     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3408   } while (0)
3409
3410
3411 #define ENCODE_LOCKING_SHIFT_2                          \
3412   do {                                                  \
3413     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3414     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3415   } while (0)
3416
3417
3418 #define ENCODE_LOCKING_SHIFT_3                          \
3419   do {                                                  \
3420     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3421     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3422   } while (0)
3423
3424
3425 /* Produce codes for a DIMENSION1 character whose character set is
3426    CHARSET and whose position-code is C1.  Designation and invocation
3427    sequences are also produced in advance if necessary.  */
3428
3429 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3430   do {                                                                  \
3431     int id = CHARSET_ID (charset);                                      \
3432                                                                         \
3433     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3434         && id == charset_ascii)                                         \
3435       {                                                                 \
3436         id = charset_jisx0201_roman;                                    \
3437         charset = CHARSET_FROM_ID (id);                                 \
3438       }                                                                 \
3439                                                                         \
3440     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3441       {                                                                 \
3442         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3443           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3444         else                                                            \
3445           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3446         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3447         break;                                                          \
3448       }                                                                 \
3449     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3450       {                                                                 \
3451         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3452         break;                                                          \
3453       }                                                                 \
3454     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3455       {                                                                 \
3456         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3457         break;                                                          \
3458       }                                                                 \
3459     else                                                                \
3460       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3461          must invoke it, or, at first, designate it to some graphic     \
3462          register.  Then repeat the loop to actually produce the        \
3463          character.  */                                                 \
3464       dst = encode_invocation_designation (charset, coding, dst,        \
3465                                            &produced_chars);            \
3466   } while (1)
3467
3468
3469 /* Produce codes for a DIMENSION2 character whose character set is
3470    CHARSET and whose position-codes are C1 and C2.  Designation and
3471    invocation codes are also produced in advance if necessary.  */
3472
3473 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3474   do {                                                                  \
3475     int id = CHARSET_ID (charset);                                      \
3476                                                                         \
3477     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3478         && id == charset_jisx0208)                                      \
3479       {                                                                 \
3480         id = charset_jisx0208_1978;                                     \
3481         charset = CHARSET_FROM_ID (id);                                 \
3482       }                                                                 \
3483                                                                         \
3484     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3485       {                                                                 \
3486         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3487           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3488         else                                                            \
3489           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3490         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3491         break;                                                          \
3492       }                                                                 \
3493     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3494       {                                                                 \
3495         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3496         break;                                                          \
3497       }                                                                 \
3498     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3499       {                                                                 \
3500         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3501         break;                                                          \
3502       }                                                                 \
3503     else                                                                \
3504       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3505          must invoke it, or, at first, designate it to some graphic     \
3506          register.  Then repeat the loop to actually produce the        \
3507          character.  */                                                 \
3508       dst = encode_invocation_designation (charset, coding, dst,        \
3509                                            &produced_chars);            \
3510   } while (1)
3511
3512
3513 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3514   do {                                                                     \
3515     int code = ENCODE_CHAR ((charset),(c));                                \
3516                                                                            \
3517     if (CHARSET_DIMENSION (charset) == 1)                                  \
3518       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3519     else                                                                   \
3520       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3521   } while (0)
3522
3523
3524 /* Produce designation and invocation codes at a place pointed by DST
3525    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3526    Return new DST.  */
3527
3528 unsigned char *
3529 encode_invocation_designation (charset, coding, dst, p_nchars)
3530      struct charset *charset;
3531      struct coding_system *coding;
3532      unsigned char *dst;
3533      int *p_nchars;
3534 {
3535   int multibytep = coding->dst_multibyte;
3536   int produced_chars = *p_nchars;
3537   int reg;                      /* graphic register number */
3538   int id = CHARSET_ID (charset);
3539
3540   /* At first, check designations.  */
3541   for (reg = 0; reg < 4; reg++)
3542     if (id == CODING_ISO_DESIGNATION (coding, reg))
3543       break;
3544
3545   if (reg >= 4)
3546     {
3547       /* CHARSET is not yet designated to any graphic registers.  */
3548       /* At first check the requested designation.  */
3549       reg = CODING_ISO_REQUEST (coding, id);
3550       if (reg < 0)
3551         /* Since CHARSET requests no special designation, designate it
3552            to graphic register 0.  */
3553         reg = 0;
3554
3555       ENCODE_DESIGNATION (charset, reg, coding);
3556     }
3557
3558   if (CODING_ISO_INVOCATION (coding, 0) != reg
3559       && CODING_ISO_INVOCATION (coding, 1) != reg)
3560     {
3561       /* Since the graphic register REG is not invoked to any graphic
3562          planes, invoke it to graphic plane 0.  */
3563       switch (reg)
3564         {
3565         case 0:                 /* graphic register 0 */
3566           ENCODE_SHIFT_IN;
3567           break;
3568
3569         case 1:                 /* graphic register 1 */
3570           ENCODE_SHIFT_OUT;
3571           break;
3572
3573         case 2:                 /* graphic register 2 */
3574           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3575             ENCODE_SINGLE_SHIFT_2;
3576           else
3577             ENCODE_LOCKING_SHIFT_2;
3578           break;
3579
3580         case 3:                 /* graphic register 3 */
3581           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3582             ENCODE_SINGLE_SHIFT_3;
3583           else
3584             ENCODE_LOCKING_SHIFT_3;
3585           break;
3586         }
3587     }
3588
3589   *p_nchars = produced_chars;
3590   return dst;
3591 }
3592
3593 /* The following three macros produce codes for indicating direction
3594    of text.  */
3595 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3596   do {                                                                  \
3597     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3598       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3599     else                                                                \
3600       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3601   } while (0)
3602
3603
3604 #define ENCODE_DIRECTION_R2L()                  \
3605   do {                                          \
3606     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3607     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3608   } while (0)
3609
3610
3611 #define ENCODE_DIRECTION_L2R()                  \
3612   do {                                          \
3613     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3614     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3615   } while (0)
3616
3617
3618 /* Produce codes for designation and invocation to reset the graphic
3619    planes and registers to initial state.  */
3620 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3621   do {                                                                  \
3622     int reg;                                                            \
3623     struct charset *charset;                                            \
3624                                                                         \
3625     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3626       ENCODE_SHIFT_IN;                                                  \
3627     for (reg = 0; reg < 4; reg++)                                       \
3628       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3629           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3630               != CODING_ISO_INITIAL (coding, reg)))                     \
3631         {                                                               \
3632           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3633           ENCODE_DESIGNATION (charset, reg, coding);                    \
3634         }                                                               \
3635   } while (0)
3636
3637
3638 /* Produce designation sequences of charsets in the line started from
3639    SRC to a place pointed by DST, and return updated DST.
3640
3641    If the current block ends before any end-of-line, we may fail to
3642    find all the necessary designations.  */
3643
3644 static unsigned char *
3645 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3646      struct coding_system *coding;
3647      int *charbuf, *charbuf_end;
3648      unsigned char *dst;
3649 {
3650   struct charset *charset;
3651   /* Table of charsets to be designated to each graphic register.  */
3652   int r[4];
3653   int c, found = 0, reg;
3654   int produced_chars = 0;
3655   int multibytep = coding->dst_multibyte;
3656   Lisp_Object attrs;
3657   Lisp_Object charset_list;
3658
3659   attrs = CODING_ID_ATTRS (coding->id);
3660   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3661   if (EQ (charset_list, Qiso_2022))
3662     charset_list = Viso_2022_charset_list;
3663
3664   for (reg = 0; reg < 4; reg++)
3665     r[reg] = -1;
3666
3667   while (found < 4)
3668     {
3669       int id;
3670
3671       c = *charbuf++;
3672       if (c == '\n')
3673         break;
3674       charset = char_charset (c, charset_list, NULL);
3675       id = CHARSET_ID (charset);
3676       reg = CODING_ISO_REQUEST (coding, id);
3677       if (reg >= 0 && r[reg] < 0)
3678         {
3679           found++;
3680           r[reg] = id;
3681         }
3682     }
3683
3684   if (found)
3685     {
3686       for (reg = 0; reg < 4; reg++)
3687         if (r[reg] >= 0
3688             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3689           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3690     }
3691
3692   return dst;
3693 }
3694
3695 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
3696
3697 static int
3698 encode_coding_iso_2022 (coding)
3699      struct coding_system *coding;
3700 {
3701   int multibytep = coding->dst_multibyte;
3702   int *charbuf = coding->charbuf;
3703   int *charbuf_end = charbuf + coding->charbuf_used;
3704   unsigned char *dst = coding->destination + coding->produced;
3705   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3706   int safe_room = 16;
3707   int bol_designation
3708     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3709        && CODING_ISO_BOL (coding));
3710   int produced_chars = 0;
3711   Lisp_Object attrs, eol_type, charset_list;
3712   int ascii_compatible;
3713   int c;
3714   int preferred_charset_id = -1;
3715
3716   CODING_GET_INFO (coding, attrs, charset_list);
3717   eol_type = CODING_ID_EOL_TYPE (coding->id);
3718   if (VECTORP (eol_type))
3719     eol_type = Qunix;
3720
3721   setup_iso_safe_charsets (attrs);
3722   /* Charset list may have been changed.  */
3723   charset_list = CODING_ATTR_CHARSET_LIST (attrs);              \
3724   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3725
3726   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3727
3728   while (charbuf < charbuf_end)
3729     {
3730       ASSURE_DESTINATION (safe_room);
3731
3732       if (bol_designation)
3733         {
3734           unsigned char *dst_prev = dst;
3735
3736           /* We have to produce designation sequences if any now.  */
3737           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3738           bol_designation = 0;
3739           /* We are sure that designation sequences are all ASCII bytes.  */
3740           produced_chars += dst - dst_prev;
3741         }
3742
3743       c = *charbuf++;
3744
3745       if (c < 0)
3746         {
3747           /* Handle an annotation.  */
3748           switch (*charbuf)
3749             {
3750             case CODING_ANNOTATE_COMPOSITION_MASK:
3751               /* Not yet implemented.  */
3752               break;
3753             case CODING_ANNOTATE_CHARSET_MASK:
3754               preferred_charset_id = charbuf[3];
3755               if (preferred_charset_id >= 0
3756                   && NILP (Fmemq (make_number (preferred_charset_id),
3757                                   charset_list)))
3758                 preferred_charset_id = -1;
3759               break;
3760             default:
3761               abort ();
3762             }
3763           charbuf += -c - 1;
3764           continue;
3765         }
3766
3767       /* Now encode the character C.  */
3768       if (c < 0x20 || c == 0x7F)
3769         {
3770           if (c == '\n'
3771               || (c == '\r' && EQ (eol_type, Qmac)))
3772             {
3773               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3774                 ENCODE_RESET_PLANE_AND_REGISTER ();
3775               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3776                 {
3777                   int i;
3778
3779                   for (i = 0; i < 4; i++)
3780                     CODING_ISO_DESIGNATION (coding, i)
3781                       = CODING_ISO_INITIAL (coding, i);
3782                 }
3783               bol_designation
3784                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3785             }
3786           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3787             ENCODE_RESET_PLANE_AND_REGISTER ();
3788           EMIT_ONE_ASCII_BYTE (c);
3789         }
3790       else if (ASCII_CHAR_P (c))
3791         {
3792           if (ascii_compatible)
3793             EMIT_ONE_ASCII_BYTE (c);
3794           else
3795             {
3796               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3797               ENCODE_ISO_CHARACTER (charset, c);
3798             }
3799         }
3800       else if (CHAR_BYTE8_P (c))
3801         {
3802           c = CHAR_TO_BYTE8 (c);
3803           EMIT_ONE_BYTE (c);
3804         }
3805       else
3806         {
3807           struct charset *charset;
3808
3809           if (preferred_charset_id >= 0)
3810             {
3811               charset = CHARSET_FROM_ID (preferred_charset_id);
3812               if (! CHAR_CHARSET_P (c, charset))
3813                 charset = char_charset (c, charset_list, NULL);
3814             }
3815           else
3816             charset = char_charset (c, charset_list, NULL);
3817           if (!charset)
3818             {
3819               if (coding->mode & CODING_MODE_SAFE_ENCODING)
3820                 {
3821                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3822                   charset = CHARSET_FROM_ID (charset_ascii);
3823                 }
3824               else
3825                 {
3826                   c = coding->default_char;
3827                   charset = char_charset (c, charset_list, NULL);
3828                 }
3829             }
3830           ENCODE_ISO_CHARACTER (charset, c);
3831         }
3832     }
3833
3834   if (coding->mode & CODING_MODE_LAST_BLOCK
3835       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3836     {
3837       ASSURE_DESTINATION (safe_room);
3838       ENCODE_RESET_PLANE_AND_REGISTER ();
3839     }
3840   record_conversion_result (coding, CODING_RESULT_SUCCESS);
3841   CODING_ISO_BOL (coding) = bol_designation;
3842   coding->produced_char += produced_chars;
3843   coding->produced = dst - coding->destination;
3844   return 0;
3845 }
3846
3847 \f
3848 /*** 8,9. SJIS and BIG5 handlers ***/
3849
3850 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3851    quite widely.  So, for the moment, Emacs supports them in the bare
3852    C code.  But, in the future, they may be supported only by CCL.  */
3853
3854 /* SJIS is a coding system encoding three character sets: ASCII, right
3855    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
3856    as is.  A character of charset katakana-jisx0201 is encoded by
3857    "position-code + 0x80".  A character of charset japanese-jisx0208
3858    is encoded in 2-byte but two position-codes are divided and shifted
3859    so that it fit in the range below.
3860
3861    --- CODE RANGE of SJIS ---
3862    (character set)      (range)
3863    ASCII                0x00 .. 0x7F
3864    KATAKANA-JISX0201    0xA0 .. 0xDF
3865    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
3866             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
3867    -------------------------------
3868
3869 */
3870
3871 /* BIG5 is a coding system encoding two character sets: ASCII and
3872    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
3873    character set and is encoded in two-byte.
3874
3875    --- CODE RANGE of BIG5 ---
3876    (character set)      (range)
3877    ASCII                0x00 .. 0x7F
3878    Big5 (1st byte)      0xA1 .. 0xFE
3879         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
3880    --------------------------
3881
3882   */
3883
3884 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3885    Check if a text is encoded in SJIS.  If it is, return
3886    CATEGORY_MASK_SJIS, else return 0.  */
3887
3888 static int
3889 detect_coding_sjis (coding, detect_info)
3890      struct coding_system *coding;
3891      struct coding_detection_info *detect_info;
3892 {
3893   const unsigned char *src = coding->source, *src_base;
3894   const unsigned char *src_end = coding->source + coding->src_bytes;
3895   int multibytep = coding->src_multibyte;
3896   int consumed_chars = 0;
3897   int found = 0;
3898   int c;
3899
3900   detect_info->checked |= CATEGORY_MASK_SJIS;
3901   /* A coding system of this category is always ASCII compatible.  */
3902   src += coding->head_ascii;
3903
3904   while (1)
3905     {
3906       src_base = src;
3907       ONE_MORE_BYTE (c);
3908       if (c < 0x80)
3909         continue;
3910       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3911         {
3912           ONE_MORE_BYTE (c);
3913           if (c < 0x40 || c == 0x7F || c > 0xFC)
3914             break;
3915           found = CATEGORY_MASK_SJIS;
3916         }
3917       else if (c >= 0xA0 && c < 0xE0)
3918         found = CATEGORY_MASK_SJIS;
3919       else
3920         break;
3921     }
3922   detect_info->rejected |= CATEGORY_MASK_SJIS;
3923   return 0;
3924
3925  no_more_source:
3926   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
3927     {
3928       detect_info->rejected |= CATEGORY_MASK_SJIS;
3929       return 0;
3930     }
3931   detect_info->found |= found;
3932   return 1;
3933 }
3934
3935 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3936    Check if a text is encoded in BIG5.  If it is, return
3937    CATEGORY_MASK_BIG5, else return 0.  */
3938
3939 static int
3940 detect_coding_big5 (coding, detect_info)
3941      struct coding_system *coding;
3942      struct coding_detection_info *detect_info;
3943 {
3944   const unsigned char *src = coding->source, *src_base;
3945   const unsigned char *src_end = coding->source + coding->src_bytes;
3946   int multibytep = coding->src_multibyte;
3947   int consumed_chars = 0;
3948   int found = 0;
3949   int c;
3950
3951   detect_info->checked |= CATEGORY_MASK_BIG5;
3952   /* A coding system of this category is always ASCII compatible.  */
3953   src += coding->head_ascii;
3954
3955   while (1)
3956     {
3957       src_base = src;
3958       ONE_MORE_BYTE (c);
3959       if (c < 0x80)
3960         continue;
3961       if (c >= 0xA1)
3962         {
3963           ONE_MORE_BYTE (c);
3964           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3965             return 0;
3966           found = CATEGORY_MASK_BIG5;
3967         }
3968       else
3969         break;
3970     }
3971   detect_info->rejected |= CATEGORY_MASK_BIG5;
3972   return 0;
3973
3974  no_more_source:
3975   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
3976     {
3977       detect_info->rejected |= CATEGORY_MASK_BIG5;
3978       return 0;
3979     }
3980   detect_info->found |= found;
3981   return 1;
3982 }
3983
3984 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3985    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3986
3987 static void
3988 decode_coding_sjis (coding)
3989      struct coding_system *coding;
3990 {
3991   const unsigned char *src = coding->source + coding->consumed;
3992   const unsigned char *src_end = coding->source + coding->src_bytes;
3993   const unsigned char *src_base;
3994   int *charbuf = coding->charbuf;
3995   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
3996   int consumed_chars = 0, consumed_chars_base;
3997   int multibytep = coding->src_multibyte;
3998   struct charset *charset_roman, *charset_kanji, *charset_kana;
3999   Lisp_Object attrs, charset_list, val;
4000   int char_offset = coding->produced_char;
4001   int last_offset = char_offset;
4002   int last_id = charset_ascii;
4003
4004   CODING_GET_INFO (coding, attrs, charset_list);
4005
4006   val = charset_list;
4007   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4008   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4009   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4010
4011   while (1)
4012     {
4013       int c, c1;
4014       struct charset *charset;
4015
4016       src_base = src;
4017       consumed_chars_base = consumed_chars;
4018
4019       if (charbuf >= charbuf_end)
4020         break;
4021
4022       ONE_MORE_BYTE (c);
4023       if (c < 0)
4024         goto invalid_code;
4025       if (c < 0x80)
4026         charset = charset_roman;
4027       else if (c == 0x80)
4028         goto invalid_code;
4029       else
4030         {
4031           if (c >= 0xF0)
4032             goto invalid_code;
4033           if (c < 0xA0 || c >= 0xE0)
4034             {
4035               /* SJIS -> JISX0208 */
4036               ONE_MORE_BYTE (c1);
4037               if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4038                 goto invalid_code;
4039               c = (c << 8) | c1;
4040               SJIS_TO_JIS (c);
4041               charset = charset_kanji;
4042             }
4043           else if (c > 0xA0)
4044             {
4045               /* SJIS -> JISX0201-Kana */
4046               c &= 0x7F;
4047               charset = charset_kana;
4048             }
4049           else
4050             goto invalid_code;
4051         }
4052       if (charset->id != charset_ascii
4053           && last_id != charset->id)
4054         {
4055           if (last_id != charset_ascii)
4056             ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4057           last_id = charset->id;
4058           last_offset = char_offset;
4059         }
4060       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4061       *charbuf++ = c;
4062       char_offset++;
4063       continue;
4064
4065     invalid_code:
4066       src = src_base;
4067       consumed_chars = consumed_chars_base;
4068       ONE_MORE_BYTE (c);
4069       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4070       char_offset++;
4071       coding->errors++;
4072     }
4073
4074  no_more_source:
4075   if (last_id != charset_ascii)
4076     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4077   coding->consumed_char += consumed_chars_base;
4078   coding->consumed = src_base - coding->source;
4079   coding->charbuf_used = charbuf - coding->charbuf;
4080 }
4081
4082 static void
4083 decode_coding_big5 (coding)
4084      struct coding_system *coding;
4085 {
4086   const unsigned char *src = coding->source + coding->consumed;
4087   const unsigned char *src_end = coding->source + coding->src_bytes;
4088   const unsigned char *src_base;
4089   int *charbuf = coding->charbuf;
4090   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4091   int consumed_chars = 0, consumed_chars_base;
4092   int multibytep = coding->src_multibyte;
4093   struct charset *charset_roman, *charset_big5;
4094   Lisp_Object attrs, charset_list, val;
4095   int char_offset = coding->produced_char;
4096   int last_offset = char_offset;
4097   int last_id = charset_ascii;
4098
4099   CODING_GET_INFO (coding, attrs, charset_list);
4100   val = charset_list;
4101   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4102   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4103
4104   while (1)
4105     {
4106       int c, c1;
4107       struct charset *charset;
4108
4109       src_base = src;
4110       consumed_chars_base = consumed_chars;
4111
4112       if (charbuf >= charbuf_end)
4113         break;
4114
4115       ONE_MORE_BYTE (c);
4116
4117       if (c < 0)
4118         goto invalid_code;
4119       if (c < 0x80)
4120         charset = charset_roman;
4121       else
4122         {
4123           /* BIG5 -> Big5 */
4124           if (c < 0xA1 || c > 0xFE)
4125             goto invalid_code;
4126           ONE_MORE_BYTE (c1);
4127           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4128             goto invalid_code;
4129           c = c << 8 | c1;
4130           charset = charset_big5;
4131         }
4132       if (charset->id != charset_ascii
4133           && last_id != charset->id)
4134         {
4135           if (last_id != charset_ascii)
4136             ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4137           last_id = charset->id;
4138           last_offset = char_offset;
4139         }
4140       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4141       *charbuf++ = c;
4142       char_offset++;
4143       continue;
4144
4145     invalid_code:
4146       src = src_base;
4147       consumed_chars = consumed_chars_base;
4148       ONE_MORE_BYTE (c);
4149       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4150       char_offset++;
4151       coding->errors++;
4152     }
4153
4154  no_more_source:
4155   if (last_id != charset_ascii)
4156     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4157   coding->consumed_char += consumed_chars_base;
4158   coding->consumed = src_base - coding->source;
4159   coding->charbuf_used = charbuf - coding->charbuf;
4160 }
4161
4162 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4163    This function can encode charsets `ascii', `katakana-jisx0201',
4164    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4165    are sure that all these charsets are registered as official charset
4166    (i.e. do not have extended leading-codes).  Characters of other
4167    charsets are produced without any encoding.  If SJIS_P is 1, encode
4168    SJIS text, else encode BIG5 text.  */
4169
4170 static int
4171 encode_coding_sjis (coding)
4172      struct coding_system *coding;
4173 {
4174   int multibytep = coding->dst_multibyte;
4175   int *charbuf = coding->charbuf;
4176   int *charbuf_end = charbuf + coding->charbuf_used;
4177   unsigned char *dst = coding->destination + coding->produced;
4178   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4179   int safe_room = 4;
4180   int produced_chars = 0;
4181   Lisp_Object attrs, charset_list, val;
4182   int ascii_compatible;
4183   struct charset *charset_roman, *charset_kanji, *charset_kana;
4184   int c;
4185
4186   CODING_GET_INFO (coding, attrs, charset_list);
4187   val = charset_list;
4188   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4189   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4190   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4191
4192   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4193
4194   while (charbuf < charbuf_end)
4195     {
4196       ASSURE_DESTINATION (safe_room);
4197       c = *charbuf++;
4198       /* Now encode the character C.  */
4199       if (ASCII_CHAR_P (c) && ascii_compatible)
4200         EMIT_ONE_ASCII_BYTE (c);
4201       else if (CHAR_BYTE8_P (c))
4202         {
4203           c = CHAR_TO_BYTE8 (c);
4204           EMIT_ONE_BYTE (c);
4205         }
4206       else
4207         {
4208           unsigned code;
4209           struct charset *charset = char_charset (c, charset_list, &code);
4210
4211           if (!charset)
4212             {
4213               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4214                 {
4215                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4216                   charset = CHARSET_FROM_ID (charset_ascii);
4217                 }
4218               else
4219                 {
4220                   c = coding->default_char;
4221                   charset = char_charset (c, charset_list, &code);
4222                 }
4223             }
4224           if (code == CHARSET_INVALID_CODE (charset))
4225             abort ();
4226           if (charset == charset_kanji)
4227             {
4228               int c1, c2;
4229               JIS_TO_SJIS (code);
4230               c1 = code >> 8, c2 = code & 0xFF;
4231               EMIT_TWO_BYTES (c1, c2);
4232             }
4233           else if (charset == charset_kana)
4234             EMIT_ONE_BYTE (code | 0x80);
4235           else
4236             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4237         }
4238     }
4239   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4240   coding->produced_char += produced_chars;
4241   coding->produced = dst - coding->destination;
4242   return 0;
4243 }
4244
4245 static int
4246 encode_coding_big5 (coding)
4247      struct coding_system *coding;
4248 {
4249   int multibytep = coding->dst_multibyte;
4250   int *charbuf = coding->charbuf;
4251   int *charbuf_end = charbuf + coding->charbuf_used;
4252   unsigned char *dst = coding->destination + coding->produced;
4253   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4254   int safe_room = 4;
4255   int produced_chars = 0;
4256   Lisp_Object attrs, charset_list, val;
4257   int ascii_compatible;
4258   struct charset *charset_roman, *charset_big5;
4259   int c;
4260
4261   CODING_GET_INFO (coding, attrs, charset_list);
4262   val = charset_list;
4263   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4264   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4265   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4266
4267   while (charbuf < charbuf_end)
4268     {
4269       ASSURE_DESTINATION (safe_room);
4270       c = *charbuf++;
4271       /* Now encode the character C.  */
4272       if (ASCII_CHAR_P (c) && ascii_compatible)
4273         EMIT_ONE_ASCII_BYTE (c);
4274       else if (CHAR_BYTE8_P (c))
4275         {
4276           c = CHAR_TO_BYTE8 (c);
4277           EMIT_ONE_BYTE (c);
4278         }
4279       else
4280         {
4281           unsigned code;
4282           struct charset *charset = char_charset (c, charset_list, &code);
4283
4284           if (! charset)
4285             {
4286               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4287                 {
4288                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4289                   charset = CHARSET_FROM_ID (charset_ascii);
4290                 }
4291               else
4292                 {
4293                   c = coding->default_char;
4294                   charset = char_charset (c, charset_list, &code);
4295                 }
4296             }
4297           if (code == CHARSET_INVALID_CODE (charset))
4298             abort ();
4299           if (charset == charset_big5)
4300             {
4301               int c1, c2;
4302
4303               c1 = code >> 8, c2 = code & 0xFF;
4304               EMIT_TWO_BYTES (c1, c2);
4305             }
4306           else
4307             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4308         }
4309     }
4310   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4311   coding->produced_char += produced_chars;
4312   coding->produced = dst - coding->destination;
4313   return 0;
4314 }
4315
4316 \f
4317 /*** 10. CCL handlers ***/
4318
4319 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4320    Check if a text is encoded in a coding system of which
4321    encoder/decoder are written in CCL program.  If it is, return
4322    CATEGORY_MASK_CCL, else return 0.  */
4323
4324 static int
4325 detect_coding_ccl (coding, detect_info)
4326      struct coding_system *coding;
4327      struct coding_detection_info *detect_info;
4328 {
4329   const unsigned char *src = coding->source, *src_base;
4330   const unsigned char *src_end = coding->source + coding->src_bytes;
4331   int multibytep = coding->src_multibyte;
4332   int consumed_chars = 0;
4333   int found = 0;
4334   unsigned char *valids = CODING_CCL_VALIDS (coding);
4335   int head_ascii = coding->head_ascii;
4336   Lisp_Object attrs;
4337
4338   detect_info->checked |= CATEGORY_MASK_CCL;
4339
4340   coding = &coding_categories[coding_category_ccl];
4341   attrs = CODING_ID_ATTRS (coding->id);
4342   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4343     src += head_ascii;
4344
4345   while (1)
4346     {
4347       int c;
4348
4349       src_base = src;
4350       ONE_MORE_BYTE (c);
4351       if (c < 0 || ! valids[c])
4352         break;
4353       if ((valids[c] > 1))
4354         found = CATEGORY_MASK_CCL;
4355     }
4356   detect_info->rejected |= CATEGORY_MASK_CCL;
4357   return 0;
4358
4359  no_more_source:
4360   detect_info->found |= found;
4361   return 1;
4362 }
4363
4364 static void
4365 decode_coding_ccl (coding)
4366      struct coding_system *coding;
4367 {
4368   const unsigned char *src = coding->source + coding->consumed;
4369   const unsigned char *src_end = coding->source + coding->src_bytes;
4370   int *charbuf = coding->charbuf;
4371   int *charbuf_end = charbuf + coding->charbuf_size;
4372   int consumed_chars = 0;
4373   int multibytep = coding->src_multibyte;
4374   struct ccl_program ccl;
4375   int source_charbuf[1024];
4376   int source_byteidx[1024];
4377   Lisp_Object attrs, charset_list;
4378
4379   CODING_GET_INFO (coding, attrs, charset_list);
4380   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4381
4382   while (src < src_end)
4383     {
4384       const unsigned char *p = src;
4385       int *source, *source_end;
4386       int i = 0;
4387
4388       if (multibytep)
4389         while (i < 1024 && p < src_end)
4390           {
4391             source_byteidx[i] = p - src;
4392             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4393           }
4394       else
4395         while (i < 1024 && p < src_end)
4396           source_charbuf[i++] = *p++;
4397
4398       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4399         ccl.last_block = 1;
4400
4401       source = source_charbuf;
4402       source_end = source + i;
4403       while (source < source_end)
4404         {
4405           ccl_driver (&ccl, source, charbuf,
4406                       source_end - source, charbuf_end - charbuf,
4407                       charset_list);
4408           source += ccl.consumed;
4409           charbuf += ccl.produced;
4410           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4411             break;
4412         }
4413       if (source < source_end)
4414         src += source_byteidx[source - source_charbuf];
4415       else
4416         src = p;
4417       consumed_chars += source - source_charbuf;
4418
4419       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4420           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4421         break;
4422     }
4423
4424   switch (ccl.status)
4425     {
4426     case CCL_STAT_SUSPEND_BY_SRC:
4427       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4428       break;
4429     case CCL_STAT_SUSPEND_BY_DST:
4430       break;
4431     case CCL_STAT_QUIT:
4432     case CCL_STAT_INVALID_CMD:
4433       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4434       break;
4435     default:
4436       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4437       break;
4438     }
4439   coding->consumed_char += consumed_chars;
4440   coding->consumed = src - coding->source;
4441   coding->charbuf_used = charbuf - coding->charbuf;
4442 }
4443
4444 static int
4445 encode_coding_ccl (coding)
4446      struct coding_system *coding;
4447 {
4448   struct ccl_program ccl;
4449   int multibytep = coding->dst_multibyte;
4450   int *charbuf = coding->charbuf;
4451   int *charbuf_end = charbuf + coding->charbuf_used;
4452   unsigned char *dst = coding->destination + coding->produced;
4453   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4454   unsigned char *adjusted_dst_end = dst_end - 1;
4455   int destination_charbuf[1024];
4456   int i, produced_chars = 0;
4457   Lisp_Object attrs, charset_list;
4458
4459   CODING_GET_INFO (coding, attrs, charset_list);
4460   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4461
4462   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4463   ccl.dst_multibyte = coding->dst_multibyte;
4464
4465   while (charbuf < charbuf_end && dst < adjusted_dst_end)
4466     {
4467       int dst_bytes = dst_end - dst;
4468       if (dst_bytes > 1024)
4469         dst_bytes = 1024;
4470
4471       ccl_driver (&ccl, charbuf, destination_charbuf,
4472                   charbuf_end - charbuf, dst_bytes, charset_list);
4473       charbuf += ccl.consumed;
4474       if (multibytep)
4475         for (i = 0; i < ccl.produced; i++)
4476           EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4477       else
4478         {
4479           for (i = 0; i < ccl.produced; i++)
4480             *dst++ = destination_charbuf[i] & 0xFF;
4481           produced_chars += ccl.produced;
4482         }
4483     }
4484
4485   switch (ccl.status)
4486     {
4487     case CCL_STAT_SUSPEND_BY_SRC:
4488       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4489       break;
4490     case CCL_STAT_SUSPEND_BY_DST:
4491       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4492       break;
4493     case CCL_STAT_QUIT:
4494     case CCL_STAT_INVALID_CMD:
4495       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4496       break;
4497     default:
4498       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4499       break;
4500     }
4501
4502   coding->produced_char += produced_chars;
4503   coding->produced = dst - coding->destination;
4504   return 0;
4505 }
4506
4507
4508 \f
4509 /*** 10, 11. no-conversion handlers ***/
4510
4511 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4512
4513 static void
4514 decode_coding_raw_text (coding)
4515      struct coding_system *coding;
4516 {
4517   coding->chars_at_source = 1;
4518   coding->consumed_char = 0;
4519   coding->consumed = 0;
4520   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4521 }
4522
4523 static int
4524 encode_coding_raw_text (coding)
4525      struct coding_system *coding;
4526 {
4527   int multibytep = coding->dst_multibyte;
4528   int *charbuf = coding->charbuf;
4529   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4530   unsigned char *dst = coding->destination + coding->produced;
4531   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4532   int produced_chars = 0;
4533   int c;
4534
4535   if (multibytep)
4536     {
4537       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4538
4539       if (coding->src_multibyte)
4540         while (charbuf < charbuf_end)
4541           {
4542             ASSURE_DESTINATION (safe_room);
4543             c = *charbuf++;
4544             if (ASCII_CHAR_P (c))
4545               EMIT_ONE_ASCII_BYTE (c);
4546             else if (CHAR_BYTE8_P (c))
4547               {
4548                 c = CHAR_TO_BYTE8 (c);
4549                 EMIT_ONE_BYTE (c);
4550               }
4551             else
4552               {
4553                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4554
4555                 CHAR_STRING_ADVANCE (c, p1);
4556                 while (p0 < p1)
4557                   {
4558                     EMIT_ONE_BYTE (*p0);
4559                     p0++;
4560                   }
4561               }
4562           }
4563       else
4564         while (charbuf < charbuf_end)
4565           {
4566             ASSURE_DESTINATION (safe_room);
4567             c = *charbuf++;
4568             EMIT_ONE_BYTE (c);
4569           }
4570     }
4571   else
4572     {
4573       if (coding->src_multibyte)
4574         {
4575           int safe_room = MAX_MULTIBYTE_LENGTH;
4576
4577           while (charbuf < charbuf_end)
4578             {
4579               ASSURE_DESTINATION (safe_room);
4580               c = *charbuf++;
4581               if (ASCII_CHAR_P (c))
4582                 *dst++ = c;
4583               else if (CHAR_BYTE8_P (c))
4584                 *dst++ = CHAR_TO_BYTE8 (c);
4585               else
4586                 CHAR_STRING_ADVANCE (c, dst);
4587               produced_chars++;
4588             }
4589         }
4590       else
4591         {
4592           ASSURE_DESTINATION (charbuf_end - charbuf);
4593           while (charbuf < charbuf_end && dst < dst_end)
4594             *dst++ = *charbuf++;
4595           produced_chars = dst - (coding->destination + coding->dst_bytes);
4596         }
4597     }
4598   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4599   coding->produced_char += produced_chars;
4600   coding->produced = dst - coding->destination;
4601   return 0;
4602 }
4603
4604 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4605    Check if a text is encoded in a charset-based coding system.  If it
4606    is, return 1, else return 0.  */
4607
4608 static int
4609 detect_coding_charset (coding, detect_info)
4610      struct coding_system *coding;
4611      struct coding_detection_info *detect_info;
4612 {
4613   const unsigned char *src = coding->source, *src_base;
4614   const unsigned char *src_end = coding->source + coding->src_bytes;
4615   int multibytep = coding->src_multibyte;
4616   int consumed_chars = 0;
4617   Lisp_Object attrs, valids;
4618   int found = 0;
4619
4620   detect_info->checked |= CATEGORY_MASK_CHARSET;
4621
4622   coding = &coding_categories[coding_category_charset];
4623   attrs = CODING_ID_ATTRS (coding->id);
4624   valids = AREF (attrs, coding_attr_charset_valids);
4625
4626   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4627     src += coding->head_ascii;
4628
4629   while (1)
4630     {
4631       int c;
4632
4633       src_base = src;
4634       ONE_MORE_BYTE (c);
4635       if (c < 0)
4636         continue;
4637       if (NILP (AREF (valids, c)))
4638         break;
4639       if (c >= 0x80)
4640         found = CATEGORY_MASK_CHARSET;
4641     }
4642   detect_info->rejected |= CATEGORY_MASK_CHARSET;
4643   return 0;
4644
4645  no_more_source:
4646   detect_info->found |= found;
4647   return 1;
4648 }
4649
4650 static void
4651 decode_coding_charset (coding)
4652      struct coding_system *coding;
4653 {
4654   const unsigned char *src = coding->source + coding->consumed;
4655   const unsigned char *src_end = coding->source + coding->src_bytes;
4656   const unsigned char *src_base;
4657   int *charbuf = coding->charbuf;
4658   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4659   int consumed_chars = 0, consumed_chars_base;
4660   int multibytep = coding->src_multibyte;
4661   Lisp_Object attrs, charset_list, valids;
4662   int char_offset = coding->produced_char;
4663   int last_offset = char_offset;
4664   int last_id = charset_ascii;
4665
4666   CODING_GET_INFO (coding, attrs, charset_list);
4667   valids = AREF (attrs, coding_attr_charset_valids);
4668
4669   while (1)
4670     {
4671       int c;
4672       Lisp_Object val;
4673       struct charset *charset;
4674       int dim;
4675       int len = 1;
4676       unsigned code;
4677
4678       src_base = src;
4679       consumed_chars_base = consumed_chars;
4680
4681       if (charbuf >= charbuf_end)
4682         break;
4683
4684       ONE_MORE_BYTE (c);
4685       if (c < 0)
4686         goto invalid_code;
4687       code = c;
4688
4689       val = AREF (valids, c);
4690       if (NILP (val))
4691         goto invalid_code;
4692       if (INTEGERP (val))
4693         {
4694           charset = CHARSET_FROM_ID (XFASTINT (val));
4695           dim = CHARSET_DIMENSION (charset);
4696           while (len < dim)
4697             {
4698               ONE_MORE_BYTE (c);
4699               code = (code << 8) | c;
4700               len++;
4701             }
4702           CODING_DECODE_CHAR (coding, src, src_base, src_end,
4703                               charset, code, c);
4704         }
4705       else
4706         {
4707           /* VAL is a list of charset IDs.  It is assured that the
4708              list is sorted by charset dimensions (smaller one
4709              comes first).  */
4710           while (CONSP (val))
4711             {
4712               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4713               dim = CHARSET_DIMENSION (charset);
4714               while (len < dim)
4715                 {
4716                   ONE_MORE_BYTE (c);
4717                   code = (code << 8) | c;
4718                   len++;
4719                 }
4720               CODING_DECODE_CHAR (coding, src, src_base,
4721                                   src_end, charset, code, c);
4722               if (c >= 0)
4723                 break;
4724               val = XCDR (val);
4725             }
4726         }
4727       if (c < 0)
4728         goto invalid_code;
4729       if (charset->id != charset_ascii
4730           && last_id != charset->id)
4731         {
4732           if (last_id != charset_ascii)
4733             ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4734           last_id = charset->id;
4735           last_offset = char_offset;
4736         }
4737
4738       *charbuf++ = c;
4739       char_offset++;
4740       continue;
4741
4742     invalid_code:
4743       src = src_base;
4744       consumed_chars = consumed_chars_base;
4745       ONE_MORE_BYTE (c);
4746       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4747       char_offset++;
4748       coding->errors++;
4749     }
4750
4751  no_more_source:
4752   if (last_id != charset_ascii)
4753     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4754   coding->consumed_char += consumed_chars_base;
4755   coding->consumed = src_base - coding->source;
4756   coding->charbuf_used = charbuf - coding->charbuf;
4757 }
4758
4759 static int
4760 encode_coding_charset (coding)
4761      struct coding_system *coding;
4762 {
4763   int multibytep = coding->dst_multibyte;
4764   int *charbuf = coding->charbuf;
4765   int *charbuf_end = charbuf + coding->charbuf_used;
4766   unsigned char *dst = coding->destination + coding->produced;
4767   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4768   int safe_room = MAX_MULTIBYTE_LENGTH;
4769   int produced_chars = 0;
4770   Lisp_Object attrs, charset_list;
4771   int ascii_compatible;
4772   int c;
4773
4774   CODING_GET_INFO (coding, attrs, charset_list);
4775   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4776
4777   while (charbuf < charbuf_end)
4778     {
4779       struct charset *charset;
4780       unsigned code;
4781
4782       ASSURE_DESTINATION (safe_room);
4783       c = *charbuf++;
4784       if (ascii_compatible && ASCII_CHAR_P (c))
4785         EMIT_ONE_ASCII_BYTE (c);
4786       else if (CHAR_BYTE8_P (c))
4787         {
4788           c = CHAR_TO_BYTE8 (c);
4789           EMIT_ONE_BYTE (c);
4790         }
4791       else
4792         {
4793           charset = char_charset (c, charset_list, &code);
4794           if (charset)
4795             {
4796               if (CHARSET_DIMENSION (charset) == 1)
4797                 EMIT_ONE_BYTE (code);
4798               else if (CHARSET_DIMENSION (charset) == 2)
4799                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4800               else if (CHARSET_DIMENSION (charset) == 3)
4801                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4802               else
4803                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4804                                  (code >> 8) & 0xFF, code & 0xFF);
4805             }
4806           else
4807             {
4808               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4809                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4810               else
4811                 c = coding->default_char;
4812               EMIT_ONE_BYTE (c);
4813             }
4814         }
4815     }
4816
4817   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4818   coding->produced_char += produced_chars;
4819   coding->produced = dst - coding->destination;
4820   return 0;
4821 }
4822
4823 \f
4824 /*** 7. C library functions ***/
4825
4826 /* Setup coding context CODING from information about CODING_SYSTEM.
4827    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
4828    CODING_SYSTEM is invalid, signal an error.  */
4829
4830 void
4831 setup_coding_system (coding_system, coding)
4832      Lisp_Object coding_system;
4833      struct coding_system *coding;
4834 {
4835   Lisp_Object attrs;
4836   Lisp_Object eol_type;
4837   Lisp_Object coding_type;
4838   Lisp_Object val;
4839
4840   if (NILP (coding_system))
4841     coding_system = Qno_conversion;
4842
4843   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4844
4845   attrs = CODING_ID_ATTRS (coding->id);
4846   eol_type = CODING_ID_EOL_TYPE (coding->id);
4847
4848   coding->mode = 0;
4849   coding->head_ascii = -1;
4850   coding->common_flags
4851     = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4852   if (! NILP (CODING_ATTR_POST_READ (attrs)))
4853     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
4854   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
4855     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
4856   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
4857     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4858
4859   val = CODING_ATTR_SAFE_CHARSETS (attrs);
4860   coding->max_charset_id = SCHARS (val) - 1;
4861   coding->safe_charsets = (char *) SDATA (val);
4862   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4863
4864   coding_type = CODING_ATTR_TYPE (attrs);
4865   if (EQ (coding_type, Qundecided))
4866     {
4867       coding->detector = NULL;
4868       coding->decoder = decode_coding_raw_text;
4869       coding->encoder = encode_coding_raw_text;
4870       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4871     }
4872   else if (EQ (coding_type, Qiso_2022))
4873     {
4874       int i;
4875       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4876
4877       /* Invoke graphic register 0 to plane 0.  */
4878       CODING_ISO_INVOCATION (coding, 0) = 0;
4879       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
4880       CODING_ISO_INVOCATION (coding, 1)
4881         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4882       /* Setup the initial status of designation.  */
4883       for (i = 0; i < 4; i++)
4884         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4885       /* Not single shifting initially.  */
4886       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4887       /* Beginning of buffer should also be regarded as bol. */
4888       CODING_ISO_BOL (coding) = 1;
4889       coding->detector = detect_coding_iso_2022;
4890       coding->decoder = decode_coding_iso_2022;
4891       coding->encoder = encode_coding_iso_2022;
4892       if (flags & CODING_ISO_FLAG_SAFE)
4893         coding->mode |= CODING_MODE_SAFE_ENCODING;
4894       coding->common_flags
4895         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4896             | CODING_REQUIRE_FLUSHING_MASK);
4897       if (flags & CODING_ISO_FLAG_COMPOSITION)
4898         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4899       if (flags & CODING_ISO_FLAG_DESIGNATION)
4900         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
4901       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4902         {
4903           setup_iso_safe_charsets (attrs);
4904           val = CODING_ATTR_SAFE_CHARSETS (attrs);
4905           coding->max_charset_id = SCHARS (val) - 1;
4906           coding->safe_charsets = (char *) SDATA (val);
4907         }
4908       CODING_ISO_FLAGS (coding) = flags;
4909     }
4910   else if (EQ (coding_type, Qcharset))
4911     {
4912       coding->detector = detect_coding_charset;
4913       coding->decoder = decode_coding_charset;
4914       coding->encoder = encode_coding_charset;
4915       coding->common_flags
4916         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4917     }
4918   else if (EQ (coding_type, Qutf_8))
4919     {
4920       coding->detector = detect_coding_utf_8;
4921       coding->decoder = decode_coding_utf_8;
4922       coding->encoder = encode_coding_utf_8;
4923       coding->common_flags
4924         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4925     }
4926   else if (EQ (coding_type, Qutf_16))
4927     {
4928       val = AREF (attrs, coding_attr_utf_16_bom);
4929       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4930                                     : EQ (val, Qt) ? utf_16_with_bom
4931                                     : utf_16_without_bom);
4932       val = AREF (attrs, coding_attr_utf_16_endian);
4933       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
4934                                        : utf_16_little_endian);
4935       CODING_UTF_16_SURROGATE (coding) = 0;
4936       coding->detector = detect_coding_utf_16;
4937       coding->decoder = decode_coding_utf_16;
4938       coding->encoder = encode_coding_utf_16;
4939       coding->common_flags
4940         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4941       if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
4942         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4943     }
4944   else if (EQ (coding_type, Qccl))
4945     {
4946       coding->detector = detect_coding_ccl;
4947       coding->decoder = decode_coding_ccl;
4948       coding->encoder = encode_coding_ccl;
4949       coding->common_flags
4950         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4951             | CODING_REQUIRE_FLUSHING_MASK);
4952     }
4953   else if (EQ (coding_type, Qemacs_mule))
4954     {
4955       coding->detector = detect_coding_emacs_mule;
4956       coding->decoder = decode_coding_emacs_mule;
4957       coding->encoder = encode_coding_emacs_mule;
4958       coding->common_flags
4959         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4960       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4961           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4962         {
4963           Lisp_Object tail, safe_charsets;
4964           int max_charset_id = 0;
4965
4966           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4967                tail = XCDR (tail))
4968             if (max_charset_id < XFASTINT (XCAR (tail)))
4969               max_charset_id = XFASTINT (XCAR (tail));
4970           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4971                                         make_number (255));
4972           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4973                tail = XCDR (tail))
4974             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
4975           coding->max_charset_id = max_charset_id;
4976           coding->safe_charsets = (char *) SDATA (safe_charsets);
4977         }
4978     }
4979   else if (EQ (coding_type, Qshift_jis))
4980     {
4981       coding->detector = detect_coding_sjis;
4982       coding->decoder = decode_coding_sjis;
4983       coding->encoder = encode_coding_sjis;
4984       coding->common_flags
4985         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4986     }
4987   else if (EQ (coding_type, Qbig5))
4988     {
4989       coding->detector = detect_coding_big5;
4990       coding->decoder = decode_coding_big5;
4991       coding->encoder = encode_coding_big5;
4992       coding->common_flags
4993         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4994     }
4995   else                          /* EQ (coding_type, Qraw_text) */
4996     {
4997       coding->detector = NULL;
4998       coding->decoder = decode_coding_raw_text;
4999       coding->encoder = encode_coding_raw_text;
5000     }
5001
5002   return;
5003 }
5004
5005 /* Return raw-text or one of its subsidiaries that has the same
5006    eol_type as CODING-SYSTEM.  */
5007
5008 Lisp_Object
5009 raw_text_coding_system (coding_system)
5010      Lisp_Object coding_system;
5011 {
5012   Lisp_Object spec, attrs;
5013   Lisp_Object eol_type, raw_text_eol_type;
5014
5015   if (NILP (coding_system))
5016     return Qraw_text;
5017   spec = CODING_SYSTEM_SPEC (coding_system);
5018   attrs = AREF (spec, 0);
5019
5020   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5021     return coding_system;
5022
5023   eol_type = AREF (spec, 2);
5024   if (VECTORP (eol_type))
5025     return Qraw_text;
5026   spec = CODING_SYSTEM_SPEC (Qraw_text);
5027   raw_text_eol_type = AREF (spec, 2);
5028   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5029           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5030           : AREF (raw_text_eol_type, 2));
5031 }
5032
5033
5034 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5035    does, return one of the subsidiary that has the same eol-spec as
5036    PARENT.  Otherwise, return CODING_SYSTEM.  */
5037
5038 Lisp_Object
5039 coding_inherit_eol_type (coding_system, parent)
5040      Lisp_Object coding_system, parent;
5041 {
5042   Lisp_Object spec, eol_type;
5043
5044   if (NILP (coding_system))
5045     coding_system = Qraw_text;
5046   spec = CODING_SYSTEM_SPEC (coding_system);
5047   eol_type = AREF (spec, 2);
5048   if (VECTORP (eol_type)
5049       && ! NILP (parent))
5050     {
5051       Lisp_Object parent_spec;
5052       Lisp_Object parent_eol_type;
5053
5054       parent_spec
5055         = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
5056       parent_eol_type = AREF (parent_spec, 2);
5057       if (EQ (parent_eol_type, Qunix))
5058         coding_system = AREF (eol_type, 0);
5059       else if (EQ (parent_eol_type, Qdos))
5060         coding_system = AREF (eol_type, 1);
5061       else if (EQ (parent_eol_type, Qmac))
5062         coding_system = AREF (eol_type, 2);
5063     }
5064   return coding_system;
5065 }
5066
5067 /* Emacs has a mechanism to automatically detect a coding system if it
5068    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5069    it's impossible to distinguish some coding systems accurately
5070    because they use the same range of codes.  So, at first, coding
5071    systems are categorized into 7, those are:
5072
5073    o coding-category-emacs-mule
5074
5075         The category for a coding system which has the same code range
5076         as Emacs' internal format.  Assigned the coding-system (Lisp
5077         symbol) `emacs-mule' by default.
5078
5079    o coding-category-sjis
5080
5081         The category for a coding system which has the same code range
5082         as SJIS.  Assigned the coding-system (Lisp
5083         symbol) `japanese-shift-jis' by default.
5084
5085    o coding-category-iso-7
5086
5087         The category for a coding system which has the same code range
5088         as ISO2022 of 7-bit environment.  This doesn't use any locking
5089         shift and single shift functions.  This can encode/decode all
5090         charsets.  Assigned the coding-system (Lisp symbol)
5091         `iso-2022-7bit' by default.
5092
5093    o coding-category-iso-7-tight
5094
5095         Same as coding-category-iso-7 except that this can
5096         encode/decode only the specified charsets.
5097
5098    o coding-category-iso-8-1
5099
5100         The category for a coding system which has the same code range
5101         as ISO2022 of 8-bit environment and graphic plane 1 used only
5102         for DIMENSION1 charset.  This doesn't use any locking shift
5103         and single shift functions.  Assigned the coding-system (Lisp
5104         symbol) `iso-latin-1' by default.
5105
5106    o coding-category-iso-8-2
5107
5108         The category for a coding system which has the same code range
5109         as ISO2022 of 8-bit environment and graphic plane 1 used only
5110         for DIMENSION2 charset.  This doesn't use any locking shift
5111         and single shift functions.  Assigned the coding-system (Lisp
5112         symbol) `japanese-iso-8bit' by default.
5113
5114    o coding-category-iso-7-else
5115
5116         The category for a coding system which has the same code range
5117         as ISO2022 of 7-bit environemnt but uses locking shift or
5118         single shift functions.  Assigned the coding-system (Lisp
5119         symbol) `iso-2022-7bit-lock' by default.
5120
5121    o coding-category-iso-8-else
5122
5123         The category for a coding system which has the same code range
5124         as ISO2022 of 8-bit environemnt but uses locking shift or
5125         single shift functions.  Assigned the coding-system (Lisp
5126         symbol) `iso-2022-8bit-ss2' by default.
5127
5128    o coding-category-big5
5129
5130         The category for a coding system which has the same code range
5131         as BIG5.  Assigned the coding-system (Lisp symbol)
5132         `cn-big5' by default.
5133
5134    o coding-category-utf-8
5135
5136         The category for a coding system which has the same code range
5137         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
5138         symbol) `utf-8' by default.
5139
5140    o coding-category-utf-16-be
5141
5142         The category for a coding system in which a text has an
5143         Unicode signature (cf. Unicode Standard) in the order of BIG
5144         endian at the head.  Assigned the coding-system (Lisp symbol)
5145         `utf-16-be' by default.
5146
5147    o coding-category-utf-16-le
5148
5149         The category for a coding system in which a text has an
5150         Unicode signature (cf. Unicode Standard) in the order of
5151         LITTLE endian at the head.  Assigned the coding-system (Lisp
5152         symbol) `utf-16-le' by default.
5153
5154    o coding-category-ccl
5155
5156         The category for a coding system of which encoder/decoder is
5157         written in CCL programs.  The default value is nil, i.e., no
5158         coding system is assigned.
5159
5160    o coding-category-binary
5161
5162         The category for a coding system not categorized in any of the
5163         above.  Assigned the coding-system (Lisp symbol)
5164         `no-conversion' by default.
5165
5166    Each of them is a Lisp symbol and the value is an actual
5167    `coding-system's (this is also a Lisp symbol) assigned by a user.
5168    What Emacs does actually is to detect a category of coding system.
5169    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5170    decide only one possible category, it selects a category of the
5171    highest priority.  Priorities of categories are also specified by a
5172    user in a Lisp variable `coding-category-list'.
5173
5174 */
5175
5176 #define EOL_SEEN_NONE   0
5177 #define EOL_SEEN_LF     1
5178 #define EOL_SEEN_CR     2
5179 #define EOL_SEEN_CRLF   4
5180
5181 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5182    SOURCE is encoded.  If CATEGORY is one of
5183    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5184    two-byte, else they are encoded by one-byte.
5185
5186    Return one of EOL_SEEN_XXX.  */
5187
5188 #define MAX_EOL_CHECK_COUNT 3
5189
5190 static int
5191 detect_eol (source, src_bytes, category)
5192      unsigned char *source;
5193      EMACS_INT src_bytes;
5194      enum coding_category category;
5195 {
5196   unsigned char *src = source, *src_end = src + src_bytes;
5197   unsigned char c;
5198   int total  = 0;
5199   int eol_seen = EOL_SEEN_NONE;
5200
5201   if ((1 << category) & CATEGORY_MASK_UTF_16)
5202     {
5203       int msb, lsb;
5204
5205       msb = category == (coding_category_utf_16_le
5206                          | coding_category_utf_16_le_nosig);
5207       lsb = 1 - msb;
5208
5209       while (src + 1 < src_end)
5210         {
5211           c = src[lsb];
5212           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5213             {
5214               int this_eol;
5215
5216               if (c == '\n')
5217                 this_eol = EOL_SEEN_LF;
5218               else if (src + 3 >= src_end
5219                        || src[msb + 2] != 0
5220                        || src[lsb + 2] != '\n')
5221                 this_eol = EOL_SEEN_CR;
5222               else
5223                 this_eol = EOL_SEEN_CRLF;
5224
5225               if (eol_seen == EOL_SEEN_NONE)
5226                 /* This is the first end-of-line.  */
5227                 eol_seen = this_eol;
5228               else if (eol_seen != this_eol)
5229                 {
5230                   /* The found type is different from what found before.  */
5231                   eol_seen = EOL_SEEN_LF;
5232                   break;
5233                 }
5234               if (++total == MAX_EOL_CHECK_COUNT)
5235                 break;
5236             }
5237           src += 2;
5238         }
5239     }
5240   else
5241     {
5242       while (src < src_end)
5243         {
5244           c = *src++;
5245           if (c == '\n' || c == '\r')
5246             {
5247               int this_eol;
5248
5249               if (c == '\n')
5250                 this_eol = EOL_SEEN_LF;
5251               else if (src >= src_end || *src != '\n')
5252                 this_eol = EOL_SEEN_CR;
5253               else
5254                 this_eol = EOL_SEEN_CRLF, src++;
5255
5256               if (eol_seen == EOL_SEEN_NONE)
5257                 /* This is the first end-of-line.  */
5258                 eol_seen = this_eol;
5259               else if (eol_seen != this_eol)
5260                 {
5261                   /* The found type is different from what found before.  */
5262                   eol_seen = EOL_SEEN_LF;
5263                   break;
5264                 }
5265               if (++total == MAX_EOL_CHECK_COUNT)
5266                 break;
5267             }
5268         }
5269     }
5270   return eol_seen;
5271 }
5272
5273
5274 static Lisp_Object
5275 adjust_coding_eol_type (coding, eol_seen)
5276      struct coding_system *coding;
5277      int eol_seen;
5278 {
5279   Lisp_Object eol_type;
5280
5281   eol_type = CODING_ID_EOL_TYPE (coding->id);
5282   if (eol_seen & EOL_SEEN_LF)
5283     {
5284       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5285       eol_type = Qunix;
5286     }
5287   else if (eol_seen & EOL_SEEN_CRLF)
5288     {
5289       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5290       eol_type = Qdos;
5291     }
5292   else if (eol_seen & EOL_SEEN_CR)
5293     {
5294       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5295       eol_type = Qmac;
5296     }
5297   return eol_type;
5298 }
5299
5300 /* Detect how a text specified in CODING is encoded.  If a coding
5301    system is detected, update fields of CODING by the detected coding
5302    system.  */
5303
5304 void
5305 detect_coding (coding)
5306      struct coding_system *coding;
5307 {
5308   const unsigned char *src, *src_end;
5309   Lisp_Object attrs, coding_type;
5310
5311   coding->consumed = coding->consumed_char = 0;
5312   coding->produced = coding->produced_char = 0;
5313   coding_set_source (coding);
5314
5315   src_end = coding->source + coding->src_bytes;
5316
5317   /* If we have not yet decided the text encoding type, detect it
5318      now.  */
5319   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5320     {
5321       int c, i;
5322
5323       for (i = 0, src = coding->source; src < src_end; i++, src++)
5324         {
5325           c = *src;
5326           if (c & 0x80 || (c < 0x20 && (c == 0
5327                                         || c == ISO_CODE_ESC
5328                                         || c == ISO_CODE_SI
5329                                         || c == ISO_CODE_SO)))
5330             break;
5331         }
5332       coding->head_ascii = src - (coding->source + coding->consumed);
5333
5334       if (coding->head_ascii < coding->src_bytes)
5335         {
5336           struct coding_detection_info detect_info;
5337           enum coding_category category;
5338           struct coding_system *this;
5339
5340           detect_info.checked = detect_info.found = detect_info.rejected = 0;
5341           for (i = 0; i < coding_category_raw_text; i++)
5342             {
5343               category = coding_priorities[i];
5344               this = coding_categories + category;
5345               if (this->id < 0)
5346                 {
5347                   /* No coding system of this category is defined.  */
5348                   detect_info.rejected |= (1 << category);
5349                 }
5350               else if (category >= coding_category_raw_text)
5351                 continue;
5352               else if (detect_info.checked & (1 << category))
5353                 {
5354                   if (detect_info.found & (1 << category))
5355                     break;
5356                 }
5357               else if ((*(this->detector)) (coding, &detect_info)
5358                        && detect_info.found & (1 << category))
5359                 {
5360                   if (category == coding_category_utf_16_auto)
5361                     {
5362                       if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5363                         category = coding_category_utf_16_le;
5364                       else
5365                         category = coding_category_utf_16_be;
5366                     }
5367                   break;
5368                 }
5369             }
5370           if (i < coding_category_raw_text)
5371             setup_coding_system (CODING_ID_NAME (this->id), coding);
5372           else if (detect_info.rejected == CATEGORY_MASK_ANY)
5373             setup_coding_system (Qraw_text, coding);
5374           else if (detect_info.rejected)
5375             for (i = 0; i < coding_category_raw_text; i++)
5376               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5377                 {
5378                   this = coding_categories + coding_priorities[i];
5379                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5380                   break;
5381                 }
5382         }
5383     }
5384   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5385            == coding_category_utf_16_auto)
5386     {
5387       Lisp_Object coding_systems;
5388       struct coding_detection_info detect_info;
5389
5390       coding_systems
5391         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5392       detect_info.found = detect_info.rejected = 0;
5393       if (CONSP (coding_systems)
5394           && detect_coding_utf_16 (coding, &detect_info))
5395         {
5396           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5397             setup_coding_system (XCAR (coding_systems), coding);
5398           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
5399             setup_coding_system (XCDR (coding_systems), coding);
5400         }
5401     }
5402 }
5403
5404
5405 static void
5406 decode_eol (coding)
5407      struct coding_system *coding;
5408 {
5409   Lisp_Object eol_type;
5410   unsigned char *p, *pbeg, *pend;
5411
5412   eol_type = CODING_ID_EOL_TYPE (coding->id);
5413   if (EQ (eol_type, Qunix))
5414     return;
5415
5416   if (NILP (coding->dst_object))
5417     pbeg = coding->destination;
5418   else
5419     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5420   pend = pbeg + coding->produced;
5421
5422   if (VECTORP (eol_type))
5423     {
5424       int eol_seen = EOL_SEEN_NONE;
5425
5426       for (p = pbeg; p < pend; p++)
5427         {
5428           if (*p == '\n')
5429             eol_seen |= EOL_SEEN_LF;
5430           else if (*p == '\r')
5431             {
5432               if (p + 1 < pend && *(p + 1) == '\n')
5433                 {
5434                   eol_seen |= EOL_SEEN_CRLF;
5435                   p++;
5436                 }
5437               else
5438                 eol_seen |= EOL_SEEN_CR;
5439             }
5440         }
5441       if (eol_seen != EOL_SEEN_NONE
5442           && eol_seen != EOL_SEEN_LF
5443           && eol_seen != EOL_SEEN_CRLF
5444           && eol_seen != EOL_SEEN_CR)
5445         eol_seen = EOL_SEEN_LF;
5446       if (eol_seen != EOL_SEEN_NONE)
5447         eol_type = adjust_coding_eol_type (coding, eol_seen);
5448     }
5449
5450   if (EQ (eol_type, Qmac))
5451     {
5452       for (p = pbeg; p < pend; p++)
5453         if (*p == '\r')
5454           *p = '\n';
5455     }
5456   else if (EQ (eol_type, Qdos))
5457     {
5458       int n = 0;
5459
5460       if (NILP (coding->dst_object))
5461         {
5462           for (p = pend - 2; p >= pbeg; p--)
5463             if (*p == '\r')
5464               {
5465                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5466                 n++;
5467               }
5468         }
5469       else
5470         {
5471           for (p = pend - 2; p >= pbeg; p--)
5472             if (*p == '\r')
5473               {
5474                 int pos_byte = coding->dst_pos_byte + (p - pbeg);
5475                 int pos = BYTE_TO_CHAR (pos_byte);
5476
5477                 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5478                 n++;
5479               }
5480         }
5481       coding->produced -= n;
5482       coding->produced_char -= n;
5483     }
5484 }
5485
5486
5487 /* Return a translation table from coding system attribute vector ATTRS
5488    for encoding (ENCODEP is nonzero) or decoding (ENCODEP is zeor). */
5489
5490 static INLINE
5491 get_translation_table (attrs, encodep)
5492 {
5493   Lisp_Object standard, translation_table;
5494
5495   if (encodep)
5496     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
5497       standard = Vstandard_translation_table_for_encode;
5498   else
5499     translation_table = CODING_ATTR_DECODE_TBL (attrs),
5500       standard = Vstandard_translation_table_for_decode;
5501   if (! NILP (translation_table) && SYMBOLP (translation_table))
5502     translation_table = Fget (translation_table, Qtranslation_table);
5503   if (NILP (translation_table))
5504     translation_table = standard;
5505   if (! CHAR_TABLE_P (translation_table))
5506     translation_table = Qnil;
5507   return translation_table;
5508 }
5509
5510
5511 static void
5512 translate_chars (coding, table)
5513      struct coding_system *coding;
5514      Lisp_Object table;
5515 {
5516   int *charbuf = coding->charbuf;
5517   int *charbuf_end = charbuf + coding->charbuf_used;
5518   int c;
5519
5520   if (coding->chars_at_source)
5521     return;
5522
5523   while (charbuf < charbuf_end)
5524     {
5525       c = *charbuf;
5526       if (c < 0)
5527         charbuf += -c;
5528       else
5529         *charbuf++ = translate_char (table, c);
5530     }
5531 }
5532
5533 static int
5534 produce_chars (coding)
5535      struct coding_system *coding;
5536 {
5537   unsigned char *dst = coding->destination + coding->produced;
5538   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5539   int produced;
5540   int produced_chars = 0;
5541
5542   if (! coding->chars_at_source)
5543     {
5544       /* Characters are in coding->charbuf.  */
5545       int *buf = coding->charbuf;
5546       int *buf_end = buf + coding->charbuf_used;
5547       unsigned char *adjusted_dst_end;
5548
5549       if (BUFFERP (coding->src_object)
5550           && EQ (coding->src_object, coding->dst_object))
5551         dst_end = ((unsigned char *) coding->source) + coding->consumed;
5552       adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5553
5554       while (buf < buf_end)
5555         {
5556           int c = *buf++;
5557
5558           if (dst >= adjusted_dst_end)
5559             {
5560               dst = alloc_destination (coding,
5561                                        buf_end - buf + MAX_MULTIBYTE_LENGTH,
5562                                        dst);
5563               dst_end = coding->destination + coding->dst_bytes;
5564               adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5565             }
5566           if (c >= 0)
5567             {
5568               if (coding->dst_multibyte
5569                   || ! CHAR_BYTE8_P (c))
5570                 CHAR_STRING_ADVANCE (c, dst);
5571               else
5572                 *dst++ = CHAR_TO_BYTE8 (c);
5573               produced_chars++;
5574             }
5575           else
5576             /* This is an annotation datum.  (-C) is the length of
5577                it.  */
5578             buf += -c - 1;
5579         }
5580     }
5581   else
5582     {
5583       const unsigned char *src = coding->source;
5584       const unsigned char *src_end = src + coding->src_bytes;
5585       Lisp_Object eol_type;
5586
5587       eol_type = CODING_ID_EOL_TYPE (coding->id);
5588
5589       if (coding->src_multibyte != coding->dst_multibyte)
5590         {
5591           if (coding->src_multibyte)
5592             {
5593               int multibytep = 1;
5594               int consumed_chars;
5595
5596               while (1)
5597                 {
5598                   const unsigned char *src_base = src;
5599                   int c;
5600
5601                   ONE_MORE_BYTE (c);
5602                   if (c == '\r')
5603                     {
5604                       if (EQ (eol_type, Qdos))
5605                         {
5606                           if (src == src_end)
5607                             {
5608                               record_conversion_result
5609                                 (coding, CODING_RESULT_INSUFFICIENT_SRC);
5610                               goto no_more_source;
5611                             }
5612                           if (*src == '\n')
5613                             c = *src++;
5614                         }
5615                       else if (EQ (eol_type, Qmac))
5616                         c = '\n';
5617                     }
5618                   if (dst == dst_end)
5619                     {
5620                       coding->consumed = src - coding->source;
5621
5622                     if (EQ (coding->src_object, coding->dst_object))
5623                       dst_end = (unsigned char *) src;
5624                     if (dst == dst_end)
5625                       {
5626                         dst = alloc_destination (coding, src_end - src + 1,
5627                                                  dst);
5628                         dst_end = coding->destination + coding->dst_bytes;
5629                         coding_set_source (coding);
5630                         src = coding->source + coding->consumed;
5631                         src_end = coding->source + coding->src_bytes;
5632                       }
5633                     }
5634                   *dst++ = c;
5635                   produced_chars++;
5636                 }
5637             no_more_source:
5638               ;
5639             }
5640           else
5641             while (src < src_end)
5642               {
5643                 int multibytep = 1;
5644                 int c = *src++;
5645
5646                 if (c == '\r')
5647                   {
5648                     if (EQ (eol_type, Qdos))
5649                       {
5650                         if (src < src_end
5651                             && *src == '\n')
5652                           c = *src++;
5653                       }
5654                     else if (EQ (eol_type, Qmac))
5655                       c = '\n';
5656                   }
5657                 if (dst >= dst_end - 1)
5658                   {
5659                     coding->consumed = src - coding->source;
5660
5661                     if (EQ (coding->src_object, coding->dst_object))
5662                       dst_end = (unsigned char *) src;
5663                     if (dst >= dst_end - 1)
5664                       {
5665                         dst = alloc_destination (coding, src_end - src + 2,
5666                                                  dst);
5667                         dst_end = coding->destination + coding->dst_bytes;
5668                         coding_set_source (coding);
5669                         src = coding->source + coding->consumed;
5670                         src_end = coding->source + coding->src_bytes;
5671                       }
5672                   }
5673                 EMIT_ONE_BYTE (c);
5674               }
5675         }
5676       else
5677         {
5678           if (!EQ (coding->src_object, coding->dst_object))
5679             {
5680               int require = coding->src_bytes - coding->dst_bytes;
5681
5682               if (require > 0)
5683                 {
5684                   EMACS_INT offset = src - coding->source;
5685
5686                   dst = alloc_destination (coding, require, dst);
5687                   coding_set_source (coding);
5688                   src = coding->source + offset;
5689                   src_end = coding->source + coding->src_bytes;
5690                 }
5691             }
5692           produced_chars = coding->src_chars;
5693           while (src < src_end)
5694             {
5695               int c = *src++;
5696
5697               if (c == '\r')
5698                 {
5699                   if (EQ (eol_type, Qdos))
5700                     {
5701                       if (src < src_end
5702                           && *src == '\n')
5703                         c = *src++;
5704                       produced_chars--;
5705                     }
5706                   else if (EQ (eol_type, Qmac))
5707                     c = '\n';
5708                 }
5709               *dst++ = c;
5710             }
5711         }
5712       coding->consumed = coding->src_bytes;
5713       coding->consumed_char = coding->src_chars;
5714     }
5715
5716   produced = dst - (coding->destination + coding->produced);
5717   if (BUFFERP (coding->dst_object))
5718     insert_from_gap (produced_chars, produced);
5719   coding->produced += produced;
5720   coding->produced_char += produced_chars;
5721   return produced_chars;
5722 }
5723
5724 /* Compose text in CODING->object according to the annotation data at
5725    CHARBUF.  CHARBUF is an array:
5726      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5727  */
5728
5729 static INLINE void
5730 produce_composition (coding, charbuf)
5731      struct coding_system *coding;
5732      int *charbuf;
5733 {
5734   int len;
5735   EMACS_INT from, to;
5736   enum composition_method method;
5737   Lisp_Object components;
5738
5739   len = -charbuf[0];
5740   from = coding->dst_pos + charbuf[2];
5741   to = coding->dst_pos + charbuf[3];
5742   method = (enum composition_method) (charbuf[4]);
5743
5744   if (method == COMPOSITION_RELATIVE)
5745     components = Qnil;
5746   else
5747     {
5748       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5749       int i;
5750
5751       len -= 5;
5752       charbuf += 5;
5753       for (i = 0; i < len; i++)
5754         args[i] = make_number (charbuf[i]);
5755       components = (method == COMPOSITION_WITH_ALTCHARS
5756                     ? Fstring (len, args) : Fvector (len, args));
5757     }
5758   compose_text (from, to, components, Qnil, coding->dst_object);
5759 }
5760
5761
5762 /* Put `charset' property on text in CODING->object according to
5763    the annotation data at CHARBUF.  CHARBUF is an array:
5764      [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5765  */
5766
5767 static INLINE void
5768 produce_charset (coding, charbuf)
5769      struct coding_system *coding;
5770      int *charbuf;
5771 {
5772   EMACS_INT from = coding->dst_pos + charbuf[2];
5773   EMACS_INT to = coding->dst_pos + charbuf[3];
5774   struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
5775
5776   Fput_text_property (make_number (from), make_number (to),
5777                       Qcharset, CHARSET_NAME (charset),
5778                       coding->dst_object);
5779 }
5780
5781
5782 #define CHARBUF_SIZE 0x4000
5783
5784 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
5785   do {                                                                  \
5786     int size = CHARBUF_SIZE;;                                           \
5787                                                                         \
5788     coding->charbuf = NULL;                                             \
5789     while (size > 1024)                                                 \
5790       {                                                                 \
5791         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
5792         if (coding->charbuf)                                            \
5793           break;                                                        \
5794         size >>= 1;                                                     \
5795       }                                                                 \
5796     if (! coding->charbuf)                                              \
5797       {                                                                 \
5798         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
5799         return coding->result;                                          \
5800       }                                                                 \
5801     coding->charbuf_size = size;                                        \
5802   } while (0)
5803
5804
5805 static void
5806 produce_annotation (coding)
5807      struct coding_system *coding;
5808 {
5809   int *charbuf = coding->charbuf;
5810   int *charbuf_end = charbuf + coding->charbuf_used;
5811
5812   if (NILP (coding->dst_object))
5813     return;
5814
5815   while (charbuf < charbuf_end)
5816     {
5817       if (*charbuf >= 0)
5818         charbuf++;
5819       else
5820         {
5821           int len = -*charbuf;
5822           switch (charbuf[1])
5823             {
5824             case CODING_ANNOTATE_COMPOSITION_MASK:
5825               produce_composition (coding, charbuf);
5826               break;
5827             case CODING_ANNOTATE_CHARSET_MASK:
5828               produce_charset (coding, charbuf);
5829               break;
5830             default:
5831               abort ();
5832             }
5833           charbuf += len;
5834         }
5835     }
5836 }
5837
5838 /* Decode the data at CODING->src_object into CODING->dst_object.
5839    CODING->src_object is a buffer, a string, or nil.
5840    CODING->dst_object is a buffer.
5841
5842    If CODING->src_object is a buffer, it must be the current buffer.
5843    In this case, if CODING->src_pos is positive, it is a position of
5844    the source text in the buffer, otherwise, the source text is in the
5845    gap area of the buffer, and CODING->src_pos specifies the offset of
5846    the text from GPT (which must be the same as PT).  If this is the
5847    same buffer as CODING->dst_object, CODING->src_pos must be
5848    negative.
5849
5850    If CODING->src_object is a string, CODING->src_pos in an index to
5851    that string.
5852
5853    If CODING->src_object is nil, CODING->source must already point to
5854    the non-relocatable memory area.  In this case, CODING->src_pos is
5855    an offset from CODING->source.
5856
5857    The decoded data is inserted at the current point of the buffer
5858    CODING->dst_object.
5859 */
5860
5861 static int
5862 decode_coding (coding)
5863      struct coding_system *coding;
5864 {
5865   Lisp_Object attrs;
5866   Lisp_Object undo_list;
5867   Lisp_Object translation_table;
5868
5869   if (BUFFERP (coding->src_object)
5870       && coding->src_pos > 0
5871       && coding->src_pos < GPT
5872       && coding->src_pos + coding->src_chars > GPT)
5873     move_gap_both (coding->src_pos, coding->src_pos_byte);
5874
5875   undo_list = Qt;
5876   if (BUFFERP (coding->dst_object))
5877     {
5878       if (current_buffer != XBUFFER (coding->dst_object))
5879         set_buffer_internal (XBUFFER (coding->dst_object));
5880       if (GPT != PT)
5881         move_gap_both (PT, PT_BYTE);
5882       undo_list = current_buffer->undo_list;
5883       current_buffer->undo_list = Qt;
5884     }
5885
5886   coding->consumed = coding->consumed_char = 0;
5887   coding->produced = coding->produced_char = 0;
5888   coding->chars_at_source = 0;
5889   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5890   coding->errors = 0;
5891
5892   ALLOC_CONVERSION_WORK_AREA (coding);
5893
5894   attrs = CODING_ID_ATTRS (coding->id);
5895   translation_table = get_translation_table (attrs, 1);
5896
5897   do
5898     {
5899       coding_set_source (coding);
5900       coding->annotated = 0;
5901       (*(coding->decoder)) (coding);
5902       if (!NILP (translation_table))
5903         translate_chars (coding, translation_table);
5904       coding_set_destination (coding);
5905       produce_chars (coding);
5906       if (coding->annotated)
5907         produce_annotation (coding);
5908     }
5909   while (coding->consumed < coding->src_bytes
5910          && ! coding->result);
5911
5912   coding->carryover_bytes = 0;
5913   if (coding->consumed < coding->src_bytes)
5914     {
5915       int nbytes = coding->src_bytes - coding->consumed;
5916       const unsigned char *src;
5917
5918       coding_set_source (coding);
5919       coding_set_destination (coding);
5920       src = coding->source + coding->consumed;
5921
5922       if (coding->mode & CODING_MODE_LAST_BLOCK)
5923         {
5924           /* Flush out unprocessed data as binary chars.  We are sure
5925              that the number of data is less than the size of
5926              coding->charbuf.  */
5927           coding->charbuf_used = 0;
5928           while (nbytes-- > 0)
5929             {
5930               int c = *src++;
5931
5932               coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
5933             }
5934           produce_chars (coding);
5935         }
5936       else
5937         {
5938           /* Record unprocessed bytes in coding->carryover.  We are
5939              sure that the number of data is less than the size of
5940              coding->carryover.  */
5941           unsigned char *p = coding->carryover;
5942
5943           coding->carryover_bytes = nbytes;
5944           while (nbytes-- > 0)
5945             *p++ = *src++;
5946         }
5947       coding->consumed = coding->src_bytes;
5948     }
5949
5950   if (BUFFERP (coding->dst_object))
5951     {
5952       current_buffer->undo_list = undo_list;
5953       record_insert (coding->dst_pos, coding->produced_char);
5954     }
5955   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5956     decode_eol (coding);
5957   return coding->result;
5958 }
5959
5960
5961 /* Extract an annotation datum from a composition starting at POS and
5962    ending before LIMIT of CODING->src_object (buffer or string), store
5963    the data in BUF, set *STOP to a starting position of the next
5964    composition (if any) or to LIMIT, and return the address of the
5965    next element of BUF.
5966
5967    If such an annotation is not found, set *STOP to a starting
5968    position of a composition after POS (if any) or to LIMIT, and
5969    return BUF.  */
5970
5971 static INLINE int *
5972 handle_composition_annotation (pos, limit, coding, buf, stop)
5973      EMACS_INT pos, limit;
5974      struct coding_system *coding;
5975      int *buf;
5976      EMACS_INT *stop;
5977 {
5978   EMACS_INT start, end;
5979   Lisp_Object prop;
5980
5981   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
5982       || end > limit)
5983     *stop = limit;
5984   else if (start > pos)
5985     *stop = start;
5986   else
5987     {
5988       if (start == pos)
5989         {
5990           /* We found a composition.  Store the corresponding
5991              annotation data in BUF.  */
5992           int *head = buf;
5993           enum composition_method method = COMPOSITION_METHOD (prop);
5994           int nchars = COMPOSITION_LENGTH (prop);
5995
5996           ADD_COMPOSITION_DATA (buf, 0, nchars, method);
5997           if (method != COMPOSITION_RELATIVE)
5998             {
5999               Lisp_Object components;
6000               int len, i, i_byte;
6001
6002               components = COMPOSITION_COMPONENTS (prop);
6003               if (VECTORP (components))
6004                 {
6005                   len = XVECTOR (components)->size;
6006                   for (i = 0; i < len; i++)
6007                     *buf++ = XINT (AREF (components, i));
6008                 }
6009               else if (STRINGP (components))
6010                 {
6011                   len = SCHARS (components);
6012                   i = i_byte = 0;
6013                   while (i < len)
6014                     {
6015                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6016                       buf++;
6017                     }
6018                 }
6019               else if (INTEGERP (components))
6020                 {
6021                   len = 1;
6022                   *buf++ = XINT (components);
6023                 }
6024               else if (CONSP (components))
6025                 {
6026                   for (len = 0; CONSP (components);
6027                        len++, components = XCDR (components))
6028                     *buf++ = XINT (XCAR (components));
6029                 }
6030               else
6031                 abort ();
6032               *head -= len;
6033             }
6034         }
6035
6036       if (find_composition (end, limit, &start, &end, &prop,
6037                             coding->src_object)
6038           && end <= limit)
6039         *stop = start;
6040       else
6041         *stop = limit;
6042     }
6043   return buf;
6044 }
6045
6046
6047 /* Extract an annotation datum from a text property `charset' at POS of
6048    CODING->src_object (buffer of string), store the data in BUF, set
6049    *STOP to the position where the value of `charset' property changes
6050    (limiting by LIMIT), and return the address of the next element of
6051    BUF.
6052
6053    If the property value is nil, set *STOP to the position where the
6054    property value is non-nil (limiting by LIMIT), and return BUF.  */
6055
6056 static INLINE int *
6057 handle_charset_annotation (pos, limit, coding, buf, stop)
6058      EMACS_INT pos, limit;
6059      struct coding_system *coding;
6060      int *buf;
6061      EMACS_INT *stop;
6062 {
6063   Lisp_Object val, next;
6064   int id;
6065
6066   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6067   if (! NILP (val) && CHARSETP (val))
6068     id = XINT (CHARSET_SYMBOL_ID (val));
6069   else
6070     id = -1;
6071   ADD_CHARSET_DATA (buf, 0, 0, id);
6072   next = Fnext_single_property_change (make_number (pos), Qcharset,
6073                                        coding->src_object,
6074                                        make_number (limit));
6075   *stop = XINT (next);
6076   return buf;
6077 }
6078
6079
6080 static void
6081 consume_chars (coding)
6082      struct coding_system *coding;
6083 {
6084   int *buf = coding->charbuf;
6085   int *buf_end = coding->charbuf + coding->charbuf_size;
6086   const unsigned char *src = coding->source + coding->consumed;
6087   const unsigned char *src_end = coding->source + coding->src_bytes;
6088   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6089   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6090   int multibytep = coding->src_multibyte;
6091   Lisp_Object eol_type;
6092   int c;
6093   EMACS_INT stop, stop_composition, stop_charset;
6094
6095   eol_type = CODING_ID_EOL_TYPE (coding->id);
6096   if (VECTORP (eol_type))
6097     eol_type = Qunix;
6098
6099   /* Note: composition handling is not yet implemented.  */
6100   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6101
6102   if (NILP (coding->src_object))
6103     stop = stop_composition = stop_charset = end_pos;
6104   else
6105     {
6106       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6107         stop = stop_composition = pos;
6108       else
6109         stop = stop_composition = end_pos;
6110       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6111         stop = stop_charset = pos;
6112       else
6113         stop_charset = end_pos;
6114     }
6115
6116   /* Compensate for CRLF and conversion.  */
6117   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6118   while (buf < buf_end)
6119     {
6120       if (pos == stop)
6121         {
6122           if (pos == end_pos)
6123             break;
6124           if (pos == stop_composition)
6125             buf = handle_composition_annotation (pos, end_pos, coding,
6126                                                  buf, &stop_composition);
6127           if (pos == stop_charset)
6128             buf = handle_charset_annotation (pos, end_pos, coding,
6129                                              buf, &stop_charset);
6130           stop = (stop_composition < stop_charset
6131                   ? stop_composition : stop_charset);
6132         }
6133
6134       if (! multibytep)
6135         {
6136           EMACS_INT bytes;
6137
6138           if (! CODING_FOR_UNIBYTE (coding)
6139               && (bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6140             c = STRING_CHAR_ADVANCE (src), pos += bytes;
6141           else
6142             c = *src++, pos++;
6143         }
6144       else
6145         c = STRING_CHAR_ADVANCE (src), pos++;
6146       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6147         c = '\n';
6148       if (! EQ (eol_type, Qunix))
6149         {
6150           if (c == '\n')
6151             {
6152               if (EQ (eol_type, Qdos))
6153                 *buf++ = '\r';
6154               else
6155                 c = '\r';
6156             }
6157         }
6158       *buf++ = c;
6159     }
6160
6161   coding->consumed = src - coding->source;
6162   coding->consumed_char = pos - coding->src_pos;
6163   coding->charbuf_used = buf - coding->charbuf;
6164   coding->chars_at_source = 0;
6165 }
6166
6167
6168 /* Encode the text at CODING->src_object into CODING->dst_object.
6169    CODING->src_object is a buffer or a string.
6170    CODING->dst_object is a buffer or nil.
6171
6172    If CODING->src_object is a buffer, it must be the current buffer.
6173    In this case, if CODING->src_pos is positive, it is a position of
6174    the source text in the buffer, otherwise. the source text is in the
6175    gap area of the buffer, and coding->src_pos specifies the offset of
6176    the text from GPT (which must be the same as PT).  If this is the
6177    same buffer as CODING->dst_object, CODING->src_pos must be
6178    negative and CODING should not have `pre-write-conversion'.
6179
6180    If CODING->src_object is a string, CODING should not have
6181    `pre-write-conversion'.
6182
6183    If CODING->dst_object is a buffer, the encoded data is inserted at
6184    the current point of that buffer.
6185
6186    If CODING->dst_object is nil, the encoded data is placed at the
6187    memory area specified by CODING->destination.  */
6188
6189 static int
6190 encode_coding (coding)
6191      struct coding_system *coding;
6192 {
6193   Lisp_Object attrs;
6194   Lisp_Object translation_table;
6195
6196   attrs = CODING_ID_ATTRS (coding->id);
6197   translation_table = get_translation_table (attrs, 1);
6198
6199   if (BUFFERP (coding->dst_object))
6200     {
6201       set_buffer_internal (XBUFFER (coding->dst_object));
6202       coding->dst_multibyte
6203         = ! NILP (current_buffer->enable_multibyte_characters);
6204     }
6205
6206   coding->consumed = coding->consumed_char = 0;
6207   coding->produced = coding->produced_char = 0;
6208   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6209   coding->errors = 0;
6210
6211   ALLOC_CONVERSION_WORK_AREA (coding);
6212
6213   do {
6214     coding_set_source (coding);
6215     consume_chars (coding);
6216
6217     if (!NILP (translation_table))
6218       translate_chars (coding, translation_table);
6219
6220     coding_set_destination (coding);
6221     (*(coding->encoder)) (coding);
6222   } while (coding->consumed_char < coding->src_chars);
6223
6224   if (BUFFERP (coding->dst_object))
6225     insert_from_gap (coding->produced_char, coding->produced);
6226
6227   return (coding->result);
6228 }
6229
6230
6231 /* Name (or base name) of work buffer for code conversion.  */
6232 static Lisp_Object Vcode_conversion_workbuf_name;
6233
6234 /* A working buffer used by the top level conversion.  Once it is
6235    created, it is never destroyed.  It has the name
6236    Vcode_conversion_workbuf_name.  The other working buffers are
6237    destroyed after the use is finished, and their names are modified
6238    versions of Vcode_conversion_workbuf_name.  */
6239 static Lisp_Object Vcode_conversion_reused_workbuf;
6240
6241 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
6242 static int reused_workbuf_in_use;
6243
6244
6245 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
6246    multibyteness of returning buffer.  */
6247
6248 Lisp_Object
6249 make_conversion_work_buffer (multibyte)
6250 {
6251   Lisp_Object name, workbuf;
6252   struct buffer *current;
6253
6254   if (reused_workbuf_in_use++)
6255     {
6256       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6257       workbuf = Fget_buffer_create (name);
6258     }
6259   else
6260     {
6261       name = Vcode_conversion_workbuf_name;
6262       workbuf = Fget_buffer_create (name);
6263       if (NILP (Vcode_conversion_reused_workbuf))
6264         Vcode_conversion_reused_workbuf = workbuf;
6265     }
6266   current = current_buffer;
6267   set_buffer_internal (XBUFFER (workbuf));
6268   Ferase_buffer ();
6269   current_buffer->undo_list = Qt;
6270   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
6271   set_buffer_internal (current);
6272   return workbuf;
6273 }
6274
6275
6276 static Lisp_Object
6277 code_conversion_restore (arg)
6278      Lisp_Object arg;
6279 {
6280   Lisp_Object current, workbuf;
6281
6282   current = XCAR (arg);
6283   workbuf = XCDR (arg);
6284   if (! NILP (workbuf))
6285     {
6286       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6287         reused_workbuf_in_use = 0;
6288       else if (! NILP (Fbuffer_live_p (workbuf)))
6289         Fkill_buffer (workbuf);
6290     }
6291   set_buffer_internal (XBUFFER (current));
6292   return Qnil;
6293 }
6294
6295 Lisp_Object
6296 code_conversion_save (with_work_buf, multibyte)
6297      int with_work_buf, multibyte;
6298 {
6299   Lisp_Object workbuf = Qnil;
6300
6301   if (with_work_buf)
6302     workbuf = make_conversion_work_buffer (multibyte);
6303   record_unwind_protect (code_conversion_restore,
6304                          Fcons (Fcurrent_buffer (), workbuf));
6305   return workbuf;
6306 }
6307
6308 int
6309 decode_coding_gap (coding, chars, bytes)
6310      struct coding_system *coding;
6311      EMACS_INT chars, bytes;
6312 {
6313   int count = specpdl_ptr - specpdl;
6314   Lisp_Object attrs;
6315
6316   code_conversion_save (0, 0);
6317
6318   coding->src_object = Fcurrent_buffer ();
6319   coding->src_chars = chars;
6320   coding->src_bytes = bytes;
6321   coding->src_pos = -chars;
6322   coding->src_pos_byte = -bytes;
6323   coding->src_multibyte = chars < bytes;
6324   coding->dst_object = coding->src_object;
6325   coding->dst_pos = PT;
6326   coding->dst_pos_byte = PT_BYTE;
6327   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6328   coding->mode |= CODING_MODE_LAST_BLOCK;
6329
6330   if (CODING_REQUIRE_DETECTION (coding))
6331     detect_coding (coding);
6332
6333   decode_coding (coding);
6334
6335   attrs = CODING_ID_ATTRS (coding->id);
6336   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6337     {
6338       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6339       Lisp_Object val;
6340
6341       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6342       val = call1 (CODING_ATTR_POST_READ (attrs),
6343                    make_number (coding->produced_char));
6344       CHECK_NATNUM (val);
6345       coding->produced_char += Z - prev_Z;
6346       coding->produced += Z_BYTE - prev_Z_BYTE;
6347     }
6348
6349   unbind_to (count, Qnil);
6350   return coding->result;
6351 }
6352
6353 int
6354 encode_coding_gap (coding, chars, bytes)
6355      struct coding_system *coding;
6356      EMACS_INT chars, bytes;
6357 {
6358   int count = specpdl_ptr - specpdl;
6359
6360   code_conversion_save (0, 0);
6361
6362   coding->src_object = Fcurrent_buffer ();
6363   coding->src_chars = chars;
6364   coding->src_bytes = bytes;
6365   coding->src_pos = -chars;
6366   coding->src_pos_byte = -bytes;
6367   coding->src_multibyte = chars < bytes;
6368   coding->dst_object = coding->src_object;
6369   coding->dst_pos = PT;
6370   coding->dst_pos_byte = PT_BYTE;
6371
6372   encode_coding (coding);
6373
6374   unbind_to (count, Qnil);
6375   return coding->result;
6376 }
6377
6378
6379 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6380    SRC_OBJECT into DST_OBJECT by coding context CODING.
6381
6382    SRC_OBJECT is a buffer, a string, or Qnil.
6383
6384    If it is a buffer, the text is at point of the buffer.  FROM and TO
6385    are positions in the buffer.
6386
6387    If it is a string, the text is at the beginning of the string.
6388    FROM and TO are indices to the string.
6389
6390    If it is nil, the text is at coding->source.  FROM and TO are
6391    indices to coding->source.
6392
6393    DST_OBJECT is a buffer, Qt, or Qnil.
6394
6395    If it is a buffer, the decoded text is inserted at point of the
6396    buffer.  If the buffer is the same as SRC_OBJECT, the source text
6397    is deleted.
6398
6399    If it is Qt, a string is made from the decoded text, and
6400    set in CODING->dst_object.
6401
6402    If it is Qnil, the decoded text is stored at CODING->destination.
6403    The caller must allocate CODING->dst_bytes bytes at
6404    CODING->destination by xmalloc.  If the decoded text is longer than
6405    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6406  */
6407
6408 void
6409 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6410                       dst_object)
6411      struct coding_system *coding;
6412      Lisp_Object src_object;
6413      EMACS_INT from, from_byte, to, to_byte;
6414      Lisp_Object dst_object;
6415 {
6416   int count = specpdl_ptr - specpdl;
6417   unsigned char *destination;
6418   EMACS_INT dst_bytes;
6419   EMACS_INT chars = to - from;
6420   EMACS_INT bytes = to_byte - from_byte;
6421   Lisp_Object attrs;
6422   Lisp_Object buffer;
6423   int saved_pt = -1, saved_pt_byte;
6424
6425   buffer = Fcurrent_buffer ();
6426
6427   if (NILP (dst_object))
6428     {
6429       destination = coding->destination;
6430       dst_bytes = coding->dst_bytes;
6431     }
6432
6433   coding->src_object = src_object;
6434   coding->src_chars = chars;
6435   coding->src_bytes = bytes;
6436   coding->src_multibyte = chars < bytes;
6437
6438   if (STRINGP (src_object))
6439     {
6440       coding->src_pos = from;
6441       coding->src_pos_byte = from_byte;
6442     }
6443   else if (BUFFERP (src_object))
6444     {
6445       set_buffer_internal (XBUFFER (src_object));
6446       if (from != GPT)
6447         move_gap_both (from, from_byte);
6448       if (EQ (src_object, dst_object))
6449         {
6450           saved_pt = PT, saved_pt_byte = PT_BYTE;
6451           TEMP_SET_PT_BOTH (from, from_byte);
6452           del_range_both (from, from_byte, to, to_byte, 1);
6453           coding->src_pos = -chars;
6454           coding->src_pos_byte = -bytes;
6455         }
6456       else
6457         {
6458           coding->src_pos = from;
6459           coding->src_pos_byte = from_byte;
6460         }
6461     }
6462
6463   if (CODING_REQUIRE_DETECTION (coding))
6464     detect_coding (coding);
6465   attrs = CODING_ID_ATTRS (coding->id);
6466
6467   if (EQ (dst_object, Qt)
6468       || (! NILP (CODING_ATTR_POST_READ (attrs))
6469           && NILP (dst_object)))
6470     {
6471       coding->dst_object = code_conversion_save (1, 1);
6472       coding->dst_pos = BEG;
6473       coding->dst_pos_byte = BEG_BYTE;
6474       coding->dst_multibyte = 1;
6475     }
6476   else if (BUFFERP (dst_object))
6477     {
6478       code_conversion_save (0, 0);
6479       coding->dst_object = dst_object;
6480       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6481       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6482       coding->dst_multibyte
6483         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6484     }
6485   else
6486     {
6487       code_conversion_save (0, 0);
6488       coding->dst_object = Qnil;
6489       coding->dst_multibyte = 1;
6490     }
6491
6492   decode_coding (coding);
6493
6494   if (BUFFERP (coding->dst_object))
6495     set_buffer_internal (XBUFFER (coding->dst_object));
6496
6497   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6498     {
6499       struct gcpro gcpro1, gcpro2;
6500       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6501       Lisp_Object val;
6502
6503       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6504       GCPRO2 (coding->src_object, coding->dst_object);
6505       val = call1 (CODING_ATTR_POST_READ (attrs),
6506                    make_number (coding->produced_char));
6507       UNGCPRO;
6508       CHECK_NATNUM (val);
6509       coding->produced_char += Z - prev_Z;
6510       coding->produced += Z_BYTE - prev_Z_BYTE;
6511     }
6512
6513   if (EQ (dst_object, Qt))
6514     {
6515       coding->dst_object = Fbuffer_string ();
6516     }
6517   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6518     {
6519       set_buffer_internal (XBUFFER (coding->dst_object));
6520       if (dst_bytes < coding->produced)
6521         {
6522           destination
6523             = (unsigned char *) xrealloc (destination, coding->produced);
6524           if (! destination)
6525             {
6526               record_conversion_result (coding,
6527                                         CODING_RESULT_INSUFFICIENT_DST);
6528               unbind_to (count, Qnil);
6529               return;
6530             }
6531           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6532             move_gap_both (BEGV, BEGV_BYTE);
6533           bcopy (BEGV_ADDR, destination, coding->produced);
6534           coding->destination = destination;
6535         }
6536     }
6537
6538   if (saved_pt >= 0)
6539     {
6540       /* This is the case of:
6541          (BUFFERP (src_object) && EQ (src_object, dst_object))
6542          As we have moved PT while replacing the original buffer
6543          contents, we must recover it now.  */
6544       set_buffer_internal (XBUFFER (src_object));
6545       if (saved_pt < from)
6546         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6547       else if (saved_pt < from + chars)
6548         TEMP_SET_PT_BOTH (from, from_byte);
6549       else if (! NILP (current_buffer->enable_multibyte_characters))
6550         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6551                           saved_pt_byte + (coding->produced - bytes));
6552       else
6553         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6554                           saved_pt_byte + (coding->produced - bytes));
6555     }
6556
6557   unbind_to (count, coding->dst_object);
6558 }
6559
6560
6561 void
6562 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6563                       dst_object)
6564      struct coding_system *coding;
6565      Lisp_Object src_object;
6566      EMACS_INT from, from_byte, to, to_byte;
6567      Lisp_Object dst_object;
6568 {
6569   int count = specpdl_ptr - specpdl;
6570   EMACS_INT chars = to - from;
6571   EMACS_INT bytes = to_byte - from_byte;
6572   Lisp_Object attrs;
6573   Lisp_Object buffer;
6574   int saved_pt = -1, saved_pt_byte;
6575
6576   buffer = Fcurrent_buffer ();
6577
6578   coding->src_object = src_object;
6579   coding->src_chars = chars;
6580   coding->src_bytes = bytes;
6581   coding->src_multibyte = chars < bytes;
6582
6583   attrs = CODING_ID_ATTRS (coding->id);
6584
6585   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6586     {
6587       coding->src_object = code_conversion_save (1, coding->src_multibyte);
6588       set_buffer_internal (XBUFFER (coding->src_object));
6589       if (STRINGP (src_object))
6590         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6591       else if (BUFFERP (src_object))
6592         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6593       else
6594         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6595
6596       if (EQ (src_object, dst_object))
6597         {
6598           set_buffer_internal (XBUFFER (src_object));
6599           saved_pt = PT, saved_pt_byte = PT_BYTE;
6600           del_range_both (from, from_byte, to, to_byte, 1);
6601           set_buffer_internal (XBUFFER (coding->src_object));
6602         }
6603
6604       call2 (CODING_ATTR_PRE_WRITE (attrs),
6605              make_number (BEG), make_number (Z));
6606       coding->src_object = Fcurrent_buffer ();
6607       if (BEG != GPT)
6608         move_gap_both (BEG, BEG_BYTE);
6609       coding->src_chars = Z - BEG;
6610       coding->src_bytes = Z_BYTE - BEG_BYTE;
6611       coding->src_pos = BEG;
6612       coding->src_pos_byte = BEG_BYTE;
6613       coding->src_multibyte = Z < Z_BYTE;
6614     }
6615   else if (STRINGP (src_object))
6616     {
6617       code_conversion_save (0, 0);
6618       coding->src_pos = from;
6619       coding->src_pos_byte = from_byte;
6620     }
6621   else if (BUFFERP (src_object))
6622     {
6623       code_conversion_save (0, 0);
6624       set_buffer_internal (XBUFFER (src_object));
6625       if (EQ (src_object, dst_object))
6626         {
6627           saved_pt = PT, saved_pt_byte = PT_BYTE;
6628           coding->src_object = del_range_1 (from, to, 1, 1);
6629           coding->src_pos = 0;
6630           coding->src_pos_byte = 0;
6631         }
6632       else
6633         {
6634           if (from < GPT && to >= GPT)
6635             move_gap_both (from, from_byte);
6636           coding->src_pos = from;
6637           coding->src_pos_byte = from_byte;
6638         }
6639     }
6640   else
6641     code_conversion_save (0, 0);
6642
6643   if (BUFFERP (dst_object))
6644     {
6645       coding->dst_object = dst_object;
6646       if (EQ (src_object, dst_object))
6647         {
6648           coding->dst_pos = from;
6649           coding->dst_pos_byte = from_byte;
6650         }
6651       else
6652         {
6653           coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6654           coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6655         }
6656       coding->dst_multibyte
6657         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6658     }
6659   else if (EQ (dst_object, Qt))
6660     {
6661       coding->dst_object = Qnil;
6662       coding->dst_bytes = coding->src_chars;
6663       if (coding->dst_bytes == 0)
6664         coding->dst_bytes = 1;
6665       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
6666       coding->dst_multibyte = 0;
6667     }
6668   else
6669     {
6670       coding->dst_object = Qnil;
6671       coding->dst_multibyte = 0;
6672     }
6673
6674   encode_coding (coding);
6675
6676   if (EQ (dst_object, Qt))
6677     {
6678       if (BUFFERP (coding->dst_object))
6679         coding->dst_object = Fbuffer_string ();
6680       else
6681         {
6682           coding->dst_object
6683             = make_unibyte_string ((char *) coding->destination,
6684                                    coding->produced);
6685           xfree (coding->destination);
6686         }
6687     }
6688
6689   if (saved_pt >= 0)
6690     {
6691       /* This is the case of:
6692          (BUFFERP (src_object) && EQ (src_object, dst_object))
6693          As we have moved PT while replacing the original buffer
6694          contents, we must recover it now.  */
6695       set_buffer_internal (XBUFFER (src_object));
6696       if (saved_pt < from)
6697         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6698       else if (saved_pt < from + chars)
6699         TEMP_SET_PT_BOTH (from, from_byte);
6700       else if (! NILP (current_buffer->enable_multibyte_characters))
6701         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6702                           saved_pt_byte + (coding->produced - bytes));
6703       else
6704         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6705                           saved_pt_byte + (coding->produced - bytes));
6706     }
6707
6708   unbind_to (count, Qnil);
6709 }
6710
6711
6712 Lisp_Object
6713 preferred_coding_system ()
6714 {
6715   int id = coding_categories[coding_priorities[0]].id;
6716
6717   return CODING_ID_NAME (id);
6718 }
6719
6720 \f
6721 #ifdef emacs
6722 /*** 8. Emacs Lisp library functions ***/
6723
6724 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6725        doc: /* Return t if OBJECT is nil or a coding-system.
6726 See the documentation of `define-coding-system' for information
6727 about coding-system objects.  */)
6728      (obj)
6729      Lisp_Object obj;
6730 {
6731   return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6732 }
6733
6734 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6735        Sread_non_nil_coding_system, 1, 1, 0,
6736        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6737      (prompt)
6738      Lisp_Object prompt;
6739 {
6740   Lisp_Object val;
6741   do
6742     {
6743       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6744                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6745     }
6746   while (SCHARS (val) == 0);
6747   return (Fintern (val, Qnil));
6748 }
6749
6750 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6751        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6752 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6753      (prompt, default_coding_system)
6754      Lisp_Object prompt, default_coding_system;
6755 {
6756   Lisp_Object val;
6757   if (SYMBOLP (default_coding_system))
6758     XSETSTRING (default_coding_system, XPNTR (SYMBOL_NAME (default_coding_system)));
6759   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6760                           Qt, Qnil, Qcoding_system_history,
6761                           default_coding_system, Qnil);
6762   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
6763 }
6764
6765 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6766        1, 1, 0,
6767        doc: /* Check validity of CODING-SYSTEM.
6768 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.  */)
6769   (coding_system)
6770      Lisp_Object coding_system;
6771 {
6772   CHECK_SYMBOL (coding_system);
6773   if (!NILP (Fcoding_system_p (coding_system)))
6774     return coding_system;
6775   while (1)
6776     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6777 }
6778
6779 \f
6780 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
6781    HIGHEST is nonzero, return the coding system of the highest
6782    priority among the detected coding systems.  Otherwize return a
6783    list of detected coding systems sorted by their priorities.  If
6784    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6785    multibyte form but contains only ASCII and eight-bit chars.
6786    Otherwise, the bytes are raw bytes.
6787
6788    CODING-SYSTEM controls the detection as below:
6789
6790    If it is nil, detect both text-format and eol-format.  If the
6791    text-format part of CODING-SYSTEM is already specified
6792    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
6793    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6794    detect only text-format.  */
6795
6796 Lisp_Object
6797 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
6798                       coding_system)
6799      const unsigned char *src;
6800      int src_chars, src_bytes, highest;
6801      int multibytep;
6802      Lisp_Object coding_system;
6803 {
6804   const unsigned char *src_end = src + src_bytes;
6805   Lisp_Object attrs, eol_type;
6806   Lisp_Object val;
6807   struct coding_system coding;
6808   int id;
6809   struct coding_detection_info detect_info;
6810   enum coding_category base_category;
6811
6812   if (NILP (coding_system))
6813     coding_system = Qundecided;
6814   setup_coding_system (coding_system, &coding);
6815   attrs = CODING_ID_ATTRS (coding.id);
6816   eol_type = CODING_ID_EOL_TYPE (coding.id);
6817   coding_system = CODING_ATTR_BASE_NAME (attrs);
6818
6819   coding.source = src;
6820   coding.src_chars = src_chars;
6821   coding.src_bytes = src_bytes;
6822   coding.src_multibyte = multibytep;
6823   coding.consumed = 0;
6824   coding.mode |= CODING_MODE_LAST_BLOCK;
6825
6826   detect_info.checked = detect_info.found = detect_info.rejected = 0;
6827
6828   /* At first, detect text-format if necessary.  */
6829   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
6830   if (base_category == coding_category_undecided)
6831     {
6832       enum coding_category category;
6833       struct coding_system *this;
6834       int c, i;
6835
6836       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
6837       for (i = 0; src < src_end; i++, src++)
6838         {
6839           c = *src;
6840           if (c & 0x80 || (c < 0x20 && (c == 0
6841                                         || c == ISO_CODE_ESC
6842                                         || c == ISO_CODE_SI
6843                                         || c == ISO_CODE_SO)))
6844             break;
6845         }
6846       coding.head_ascii = src - coding.source;
6847
6848       if (src < src_end)
6849         for (i = 0; i < coding_category_raw_text; i++)
6850           {
6851             category = coding_priorities[i];
6852             this = coding_categories + category;
6853
6854             if (this->id < 0)
6855               {
6856                 /* No coding system of this category is defined.  */
6857                 detect_info.rejected |= (1 << category);
6858               }
6859             else if (category >= coding_category_raw_text)
6860               continue;
6861             else if (detect_info.checked & (1 << category))
6862               {
6863                 if (highest
6864                     && (detect_info.found & (1 << category)))
6865                   break;
6866               }
6867             else
6868               {
6869                 if ((*(this->detector)) (&coding, &detect_info)
6870                     && highest
6871                     && (detect_info.found & (1 << category)))
6872                   {
6873                     if (category == coding_category_utf_16_auto)
6874                       {
6875                         if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6876                           category = coding_category_utf_16_le;
6877                         else
6878                           category = coding_category_utf_16_be;
6879                       }
6880                     break;
6881                   }
6882               }
6883           }
6884
6885       if (detect_info.rejected == CATEGORY_MASK_ANY)
6886         {
6887           detect_info.found = CATEGORY_MASK_RAW_TEXT;
6888           id = coding_categories[coding_category_raw_text].id;
6889           val = Fcons (make_number (id), Qnil);
6890         }
6891       else if (! detect_info.rejected && ! detect_info.found)
6892         {
6893           detect_info.found = CATEGORY_MASK_ANY;
6894           id = coding_categories[coding_category_undecided].id;
6895           val = Fcons (make_number (id), Qnil);
6896         }
6897       else if (highest)
6898         {
6899           if (detect_info.found)
6900             {
6901               detect_info.found = 1 << category;
6902               val = Fcons (make_number (this->id), Qnil);
6903             }
6904           else
6905             for (i = 0; i < coding_category_raw_text; i++)
6906               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6907                 {
6908                   detect_info.found = 1 << coding_priorities[i];
6909                   id = coding_categories[coding_priorities[i]].id;
6910                   val = Fcons (make_number (id), Qnil);
6911                   break;
6912                 }
6913         }
6914       else
6915         {
6916           int mask = detect_info.rejected | detect_info.found;
6917           int found = 0;
6918           val = Qnil;
6919
6920           for (i = coding_category_raw_text - 1; i >= 0; i--)
6921             {
6922               category = coding_priorities[i];
6923               if (! (mask & (1 << category)))
6924                 {
6925                   found |= 1 << category;
6926                   id = coding_categories[category].id;
6927                   val = Fcons (make_number (id), val);
6928                 }
6929             }
6930           for (i = coding_category_raw_text - 1; i >= 0; i--)
6931             {
6932               category = coding_priorities[i];
6933               if (detect_info.found & (1 << category))
6934                 {
6935                   id = coding_categories[category].id;
6936                   val = Fcons (make_number (id), val);
6937                 }
6938             }
6939           detect_info.found |= found;
6940         }
6941     }
6942   else if (base_category == coding_category_utf_16_auto)
6943     {
6944       if (detect_coding_utf_16 (&coding, &detect_info))
6945         {
6946           enum coding_category category;
6947           struct coding_system *this;
6948
6949           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6950             this = coding_categories + coding_category_utf_16_le;
6951           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6952             this = coding_categories + coding_category_utf_16_be;
6953           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
6954             this = coding_categories + coding_category_utf_16_be_nosig;
6955           else
6956             this = coding_categories + coding_category_utf_16_le_nosig;
6957           val = Fcons (make_number (this->id), Qnil);
6958         }
6959     }
6960   else
6961     {
6962       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
6963       val = Fcons (make_number (coding.id), Qnil);
6964     }
6965
6966   /* Then, detect eol-format if necessary.  */
6967   {
6968     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
6969     Lisp_Object tail;
6970
6971     if (VECTORP (eol_type))
6972       {
6973         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
6974           normal_eol = detect_eol (coding.source, src_bytes,
6975                                    coding_category_raw_text);
6976         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
6977                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
6978           utf_16_be_eol = detect_eol (coding.source, src_bytes,
6979                                       coding_category_utf_16_be);
6980         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
6981                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
6982           utf_16_le_eol = detect_eol (coding.source, src_bytes,
6983                                       coding_category_utf_16_le);
6984       }
6985     else
6986       {
6987         if (EQ (eol_type, Qunix))
6988           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
6989         else if (EQ (eol_type, Qdos))
6990           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
6991         else
6992           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
6993       }
6994
6995     for (tail = val; CONSP (tail); tail = XCDR (tail))
6996       {
6997         enum coding_category category;
6998         int this_eol;
6999
7000         id = XINT (XCAR (tail));
7001         attrs = CODING_ID_ATTRS (id);
7002         category = XINT (CODING_ATTR_CATEGORY (attrs));
7003         eol_type = CODING_ID_EOL_TYPE (id);
7004         if (VECTORP (eol_type))
7005           {
7006             if (category == coding_category_utf_16_be
7007                 || category == coding_category_utf_16_be_nosig)
7008               this_eol = utf_16_be_eol;
7009             else if (category == coding_category_utf_16_le
7010                      || category == coding_category_utf_16_le_nosig)
7011               this_eol = utf_16_le_eol;
7012             else
7013               this_eol = normal_eol;
7014
7015             if (this_eol == EOL_SEEN_LF)
7016               XSETCAR (tail, AREF (eol_type, 0));
7017             else if (this_eol == EOL_SEEN_CRLF)
7018               XSETCAR (tail, AREF (eol_type, 1));
7019             else if (this_eol == EOL_SEEN_CR)
7020               XSETCAR (tail, AREF (eol_type, 2));
7021             else
7022               XSETCAR (tail, CODING_ID_NAME (id));
7023           }
7024         else
7025           XSETCAR (tail, CODING_ID_NAME (id));
7026       }
7027   }
7028
7029   return (highest ? XCAR (val) : val);
7030 }
7031
7032
7033 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7034        2, 3, 0,
7035        doc: /* Detect coding system of the text in the region between START and END.
7036 Return a list of possible coding systems ordered by priority.
7037
7038 If only ASCII characters are found, it returns a list of single element
7039 `undecided' or its subsidiary coding system according to a detected
7040 end-of-line format.
7041
7042 If optional argument HIGHEST is non-nil, return the coding system of
7043 highest priority.  */)
7044      (start, end, highest)
7045      Lisp_Object start, end, highest;
7046 {
7047   int from, to;
7048   int from_byte, to_byte;
7049
7050   CHECK_NUMBER_COERCE_MARKER (start);
7051   CHECK_NUMBER_COERCE_MARKER (end);
7052
7053   validate_region (&start, &end);
7054   from = XINT (start), to = XINT (end);
7055   from_byte = CHAR_TO_BYTE (from);
7056   to_byte = CHAR_TO_BYTE (to);
7057
7058   if (from < GPT && to >= GPT)
7059     move_gap_both (to, to_byte);
7060
7061   return detect_coding_system (BYTE_POS_ADDR (from_byte),
7062                                to - from, to_byte - from_byte,
7063                                !NILP (highest),
7064                                !NILP (current_buffer
7065                                       ->enable_multibyte_characters),
7066                                Qnil);
7067 }
7068
7069 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7070        1, 2, 0,
7071        doc: /* Detect coding system of the text in STRING.
7072 Return a list of possible coding systems ordered by priority.
7073
7074 If only ASCII characters are found, it returns a list of single element
7075 `undecided' or its subsidiary coding system according to a detected
7076 end-of-line format.
7077
7078 If optional argument HIGHEST is non-nil, return the coding system of
7079 highest priority.  */)
7080      (string, highest)
7081      Lisp_Object string, highest;
7082 {
7083   CHECK_STRING (string);
7084
7085   return detect_coding_system (SDATA (string),
7086                                SCHARS (string), SBYTES (string),
7087                                !NILP (highest), STRING_MULTIBYTE (string),
7088                                Qnil);
7089 }
7090
7091
7092 static INLINE int
7093 char_encodable_p (c, attrs)
7094      int c;
7095      Lisp_Object attrs;
7096 {
7097   Lisp_Object tail;
7098   struct charset *charset;
7099   Lisp_Object translation_table;
7100
7101   translation_table = CODING_ATTR_TRANS_TBL (attrs);
7102   if (CHAR_TABLE_P (translation_table))
7103     c = translate_char (translation_table, c);
7104   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7105        CONSP (tail); tail = XCDR (tail))
7106     {
7107       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7108       if (CHAR_CHARSET_P (c, charset))
7109         break;
7110     }
7111   return (! NILP (tail));
7112 }
7113
7114
7115 /* Return a list of coding systems that safely encode the text between
7116    START and END.  If EXCLUDE is non-nil, it is a list of coding
7117    systems not to check.  The returned list doesn't contain any such
7118    coding systems.  In any case, if the text contains only ASCII or is
7119    unibyte, return t.  */
7120
7121 DEFUN ("find-coding-systems-region-internal",
7122        Ffind_coding_systems_region_internal,
7123        Sfind_coding_systems_region_internal, 2, 3, 0,
7124        doc: /* Internal use only.  */)
7125      (start, end, exclude)
7126      Lisp_Object start, end, exclude;
7127 {
7128   Lisp_Object coding_attrs_list, safe_codings;
7129   EMACS_INT start_byte, end_byte;
7130   const unsigned char *p, *pbeg, *pend;
7131   int c;
7132   Lisp_Object tail, elt;
7133
7134   if (STRINGP (start))
7135     {
7136       if (!STRING_MULTIBYTE (start)
7137           || SCHARS (start) == SBYTES (start))
7138         return Qt;
7139       start_byte = 0;
7140       end_byte = SBYTES (start);
7141     }
7142   else
7143     {
7144       CHECK_NUMBER_COERCE_MARKER (start);
7145       CHECK_NUMBER_COERCE_MARKER (end);
7146       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7147         args_out_of_range (start, end);
7148       if (NILP (current_buffer->enable_multibyte_characters))
7149         return Qt;
7150       start_byte = CHAR_TO_BYTE (XINT (start));
7151       end_byte = CHAR_TO_BYTE (XINT (end));
7152       if (XINT (end) - XINT (start) == end_byte - start_byte)
7153         return Qt;
7154
7155       if (XINT (start) < GPT && XINT (end) > GPT)
7156         {
7157           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7158             move_gap_both (XINT (start), start_byte);
7159           else
7160             move_gap_both (XINT (end), end_byte);
7161         }
7162     }
7163
7164   coding_attrs_list = Qnil;
7165   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7166     if (NILP (exclude)
7167         || NILP (Fmemq (XCAR (tail), exclude)))
7168       {
7169         Lisp_Object attrs;
7170
7171         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7172         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7173             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7174           {
7175             ASET (attrs, coding_attr_trans_tbl,
7176                   get_translation_table (attrs, 1));
7177             coding_attrs_list = Fcons (attrs, coding_attrs_list);
7178           }
7179       }
7180
7181   if (STRINGP (start))
7182     p = pbeg = SDATA (start);
7183   else
7184     p = pbeg = BYTE_POS_ADDR (start_byte);
7185   pend = p + (end_byte - start_byte);
7186
7187   while (p < pend && ASCII_BYTE_P (*p)) p++;
7188   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7189
7190   while (p < pend)
7191     {
7192       if (ASCII_BYTE_P (*p))
7193         p++;
7194       else
7195         {
7196           c = STRING_CHAR_ADVANCE (p);
7197
7198           charset_map_loaded = 0;
7199           for (tail = coding_attrs_list; CONSP (tail);)
7200             {
7201               elt = XCAR (tail);
7202               if (NILP (elt))
7203                 tail = XCDR (tail);
7204               else if (char_encodable_p (c, elt))
7205                 tail = XCDR (tail);
7206               else if (CONSP (XCDR (tail)))
7207                 {
7208                   XSETCAR (tail, XCAR (XCDR (tail)));
7209                   XSETCDR (tail, XCDR (XCDR (tail)));
7210                 }
7211               else
7212                 {
7213                   XSETCAR (tail, Qnil);
7214                   tail = XCDR (tail);
7215                 }
7216             }
7217           if (charset_map_loaded)
7218             {
7219               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7220
7221               if (STRINGP (start))
7222                 pbeg = SDATA (start);
7223               else
7224                 pbeg = BYTE_POS_ADDR (start_byte);
7225               p = pbeg + p_offset;
7226               pend = pbeg + pend_offset;
7227             }
7228         }
7229     }
7230
7231   safe_codings = Qnil;
7232   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7233     if (! NILP (XCAR (tail)))
7234       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
7235
7236   return safe_codings;
7237 }
7238
7239
7240 DEFUN ("unencodable-char-position", Funencodable_char_position,
7241        Sunencodable_char_position, 3, 5, 0,
7242        doc: /*
7243 Return position of first un-encodable character in a region.
7244 START and END specfiy the region and CODING-SYSTEM specifies the
7245 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
7246
7247 If optional 4th argument COUNT is non-nil, it specifies at most how
7248 many un-encodable characters to search.  In this case, the value is a
7249 list of positions.
7250
7251 If optional 5th argument STRING is non-nil, it is a string to search
7252 for un-encodable characters.  In that case, START and END are indexes
7253 to the string.  */)
7254      (start, end, coding_system, count, string)
7255      Lisp_Object start, end, coding_system, count, string;
7256 {
7257   int n;
7258   struct coding_system coding;
7259   Lisp_Object attrs, charset_list, translation_table;
7260   Lisp_Object positions;
7261   int from, to;
7262   const unsigned char *p, *stop, *pend;
7263   int ascii_compatible;
7264
7265   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7266   attrs = CODING_ID_ATTRS (coding.id);
7267   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7268     return Qnil;
7269   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7270   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7271   translation_table = get_translation_table (attrs, 1);
7272
7273   if (NILP (string))
7274     {
7275       validate_region (&start, &end);
7276       from = XINT (start);
7277       to = XINT (end);
7278       if (NILP (current_buffer->enable_multibyte_characters)
7279           || (ascii_compatible
7280               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7281         return Qnil;
7282       p = CHAR_POS_ADDR (from);
7283       pend = CHAR_POS_ADDR (to);
7284       if (from < GPT && to >= GPT)
7285         stop = GPT_ADDR;
7286       else
7287         stop = pend;
7288     }
7289   else
7290     {
7291       CHECK_STRING (string);
7292       CHECK_NATNUM (start);
7293       CHECK_NATNUM (end);
7294       from = XINT (start);
7295       to = XINT (end);
7296       if (from > to
7297           || to > SCHARS (string))
7298         args_out_of_range_3 (string, start, end);
7299       if (! STRING_MULTIBYTE (string))
7300         return Qnil;
7301       p = SDATA (string) + string_char_to_byte (string, from);
7302       stop = pend = SDATA (string) + string_char_to_byte (string, to);
7303       if (ascii_compatible && (to - from) == (pend - p))
7304         return Qnil;
7305     }
7306
7307   if (NILP (count))
7308     n = 1;
7309   else
7310     {
7311       CHECK_NATNUM (count);
7312       n = XINT (count);
7313     }
7314
7315   positions = Qnil;
7316   while (1)
7317     {
7318       int c;
7319
7320       if (ascii_compatible)
7321         while (p < stop && ASCII_BYTE_P (*p))
7322           p++, from++;
7323       if (p >= stop)
7324         {
7325           if (p >= pend)
7326             break;
7327           stop = pend;
7328           p = GAP_END_ADDR;
7329         }
7330
7331       c = STRING_CHAR_ADVANCE (p);
7332       if (! (ASCII_CHAR_P (c) && ascii_compatible)
7333           && ! char_charset (translate_char (translation_table, c),
7334                              charset_list, NULL))
7335         {
7336           positions = Fcons (make_number (from), positions);
7337           n--;
7338           if (n == 0)
7339             break;
7340         }
7341
7342       from++;
7343     }
7344
7345   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
7346 }
7347
7348
7349 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7350        Scheck_coding_systems_region, 3, 3, 0,
7351        doc: /* Check if the region is encodable by coding systems.
7352
7353 START and END are buffer positions specifying the region.
7354 CODING-SYSTEM-LIST is a list of coding systems to check.
7355
7356 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7357 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7358 whole region, POS0, POS1, ... are buffer positions where non-encodable
7359 characters are found.
7360
7361 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7362 value is nil.
7363
7364 START may be a string.  In that case, check if the string is
7365 encodable, and the value contains indices to the string instead of
7366 buffer positions.  END is ignored.  */)
7367      (start, end, coding_system_list)
7368      Lisp_Object start, end, coding_system_list;
7369 {
7370   Lisp_Object list;
7371   EMACS_INT start_byte, end_byte;
7372   int pos;
7373   const unsigned char *p, *pbeg, *pend;
7374   int c;
7375   Lisp_Object tail, elt, attrs;
7376
7377   if (STRINGP (start))
7378     {
7379       if (!STRING_MULTIBYTE (start)
7380           && SCHARS (start) != SBYTES (start))
7381         return Qnil;
7382       start_byte = 0;
7383       end_byte = SBYTES (start);
7384       pos = 0;
7385     }
7386   else
7387     {
7388       CHECK_NUMBER_COERCE_MARKER (start);
7389       CHECK_NUMBER_COERCE_MARKER (end);
7390       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7391         args_out_of_range (start, end);
7392       if (NILP (current_buffer->enable_multibyte_characters))
7393         return Qnil;
7394       start_byte = CHAR_TO_BYTE (XINT (start));
7395       end_byte = CHAR_TO_BYTE (XINT (end));
7396       if (XINT (end) - XINT (start) == end_byte - start_byte)
7397         return Qt;
7398
7399       if (XINT (start) < GPT && XINT (end) > GPT)
7400         {
7401           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7402             move_gap_both (XINT (start), start_byte);
7403           else
7404             move_gap_both (XINT (end), end_byte);
7405         }
7406       pos = XINT (start);
7407     }
7408
7409   list = Qnil;
7410   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7411     {
7412       elt = XCAR (tail);
7413       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
7414       ASET (attrs, coding_attr_trans_tbl, get_translation_table (attrs, 1));
7415       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
7416     }
7417
7418   if (STRINGP (start))
7419     p = pbeg = SDATA (start);
7420   else
7421     p = pbeg = BYTE_POS_ADDR (start_byte);
7422   pend = p + (end_byte - start_byte);
7423
7424   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7425   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7426
7427   while (p < pend)
7428     {
7429       if (ASCII_BYTE_P (*p))
7430         p++;
7431       else
7432         {
7433           c = STRING_CHAR_ADVANCE (p);
7434
7435           charset_map_loaded = 0;
7436           for (tail = list; CONSP (tail); tail = XCDR (tail))
7437             {
7438               elt = XCDR (XCAR (tail));
7439               if (! char_encodable_p (c, XCAR (elt)))
7440                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7441             }
7442           if (charset_map_loaded)
7443             {
7444               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7445
7446               if (STRINGP (start))
7447                 pbeg = SDATA (start);
7448               else
7449                 pbeg = BYTE_POS_ADDR (start_byte);
7450               p = pbeg + p_offset;
7451               pend = pbeg + pend_offset;
7452             }
7453         }
7454       pos++;
7455     }
7456
7457   tail = list;
7458   list = Qnil;
7459   for (; CONSP (tail); tail = XCDR (tail))
7460     {
7461       elt = XCAR (tail);
7462       if (CONSP (XCDR (XCDR (elt))))
7463         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7464                       list);
7465     }
7466
7467   return list;
7468 }
7469
7470
7471 Lisp_Object
7472 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7473      Lisp_Object start, end, coding_system, dst_object;
7474      int encodep, norecord;
7475 {
7476   struct coding_system coding;
7477   EMACS_INT from, from_byte, to, to_byte;
7478   Lisp_Object src_object;
7479
7480   CHECK_NUMBER_COERCE_MARKER (start);
7481   CHECK_NUMBER_COERCE_MARKER (end);
7482   if (NILP (coding_system))
7483     coding_system = Qno_conversion;
7484   else
7485     CHECK_CODING_SYSTEM (coding_system);
7486   src_object = Fcurrent_buffer ();
7487   if (NILP (dst_object))
7488     dst_object = src_object;
7489   else if (! EQ (dst_object, Qt))
7490     CHECK_BUFFER (dst_object);
7491
7492   validate_region (&start, &end);
7493   from = XFASTINT (start);
7494   from_byte = CHAR_TO_BYTE (from);
7495   to = XFASTINT (end);
7496   to_byte = CHAR_TO_BYTE (to);
7497
7498   setup_coding_system (coding_system, &coding);
7499   coding.mode |= CODING_MODE_LAST_BLOCK;
7500
7501   if (encodep)
7502     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7503                           dst_object);
7504   else
7505     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7506                           dst_object);
7507   if (! norecord)
7508     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7509
7510   return (BUFFERP (dst_object)
7511           ? make_number (coding.produced_char)
7512           : coding.dst_object);
7513 }
7514
7515
7516 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
7517        3, 4, "r\nzCoding system: ",
7518        doc: /* Decode the current region from the specified coding system.
7519 When called from a program, takes four arguments:
7520         START, END, CODING-SYSTEM, and DESTINATION.
7521 START and END are buffer positions.
7522
7523 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7524 If nil, the region between START and END is replace by the decoded text.
7525 If buffer, the decoded text is inserted in the buffer.
7526 If t, the decoded text is returned.
7527
7528 This function sets `last-coding-system-used' to the precise coding system
7529 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7530 not fully specified.)
7531 It returns the length of the decoded text.  */)
7532      (start, end, coding_system, destination)
7533      Lisp_Object start, end, coding_system, destination;
7534 {
7535   return code_convert_region (start, end, coding_system, destination, 0, 0);
7536 }
7537
7538 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
7539        3, 4, "r\nzCoding system: ",
7540        doc: /* Encode the current region by specified coding system.
7541 When called from a program, takes three arguments:
7542 START, END, and CODING-SYSTEM.  START and END are buffer positions.
7543
7544 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7545 If nil, the region between START and END is replace by the encoded text.
7546 If buffer, the encoded text is inserted in the buffer.
7547 If t, the encoded text is returned.
7548
7549 This function sets `last-coding-system-used' to the precise coding system
7550 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7551 not fully specified.)
7552 It returns the length of the encoded text.  */)
7553   (start, end, coding_system, destination)
7554      Lisp_Object start, end, coding_system, destination;
7555 {
7556   return code_convert_region (start, end, coding_system, destination, 1, 0);
7557 }
7558
7559 Lisp_Object
7560 code_convert_string (string, coding_system, dst_object,
7561                      encodep, nocopy, norecord)
7562      Lisp_Object string, coding_system, dst_object;
7563      int encodep, nocopy, norecord;
7564 {
7565   struct coding_system coding;
7566   EMACS_INT chars, bytes;
7567
7568   CHECK_STRING (string);
7569   if (NILP (coding_system))
7570     {
7571       if (! norecord)
7572         Vlast_coding_system_used = Qno_conversion;
7573       if (NILP (dst_object))
7574         return (nocopy ? Fcopy_sequence (string) : string);
7575     }
7576
7577   if (NILP (coding_system))
7578     coding_system = Qno_conversion;
7579   else
7580     CHECK_CODING_SYSTEM (coding_system);
7581   if (NILP (dst_object))
7582     dst_object = Qt;
7583   else if (! EQ (dst_object, Qt))
7584     CHECK_BUFFER (dst_object);
7585
7586   setup_coding_system (coding_system, &coding);
7587   coding.mode |= CODING_MODE_LAST_BLOCK;
7588   chars = SCHARS (string);
7589   bytes = SBYTES (string);
7590   if (encodep)
7591     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7592   else
7593     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7594   if (! norecord)
7595     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7596
7597   return (BUFFERP (dst_object)
7598           ? make_number (coding.produced_char)
7599           : coding.dst_object);
7600 }
7601
7602
7603 /* Encode or decode STRING according to CODING_SYSTEM.
7604    Do not set Vlast_coding_system_used.
7605
7606    This function is called only from macros DECODE_FILE and
7607    ENCODE_FILE, thus we ignore character composition.  */
7608
7609 Lisp_Object
7610 code_convert_string_norecord (string, coding_system, encodep)
7611      Lisp_Object string, coding_system;
7612      int encodep;
7613 {
7614   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
7615 }
7616
7617
7618 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7619        2, 4, 0,
7620        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7621
7622 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7623 if the decoding operation is trivial.
7624
7625 Optional fourth arg BUFFER non-nil meant that the decoded text is
7626 inserted in BUFFER instead of returned as a string.  In this case,
7627 the return value is BUFFER.
7628
7629 This function sets `last-coding-system-used' to the precise coding system
7630 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7631 not fully specified.  */)
7632   (string, coding_system, nocopy, buffer)
7633      Lisp_Object string, coding_system, nocopy, buffer;
7634 {
7635   return code_convert_string (string, coding_system, buffer,
7636                               0, ! NILP (nocopy), 0);
7637 }
7638
7639 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7640        2, 4, 0,
7641        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7642
7643 Optional third arg NOCOPY non-nil means it is OK to return STRING
7644 itself if the encoding operation is trivial.
7645
7646 Optional fourth arg BUFFER non-nil meant that the encoded text is
7647 inserted in BUFFER instead of returned as a string.  In this case,
7648 the return value is BUFFER.
7649
7650 This function sets `last-coding-system-used' to the precise coding system
7651 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7652 not fully specified.)  */)
7653      (string, coding_system, nocopy, buffer)
7654      Lisp_Object string, coding_system, nocopy, buffer;
7655 {
7656   return code_convert_string (string, coding_system, buffer,
7657                               1, ! NILP (nocopy), 1);
7658 }
7659
7660 \f
7661 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7662        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7663 Return the corresponding character.  */)
7664      (code)
7665      Lisp_Object code;
7666 {
7667   Lisp_Object spec, attrs, val;
7668   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7669   int c;
7670
7671   CHECK_NATNUM (code);
7672   c = XFASTINT (code);
7673   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7674   attrs = AREF (spec, 0);
7675
7676   if (ASCII_BYTE_P (c)
7677       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7678     return code;
7679
7680   val = CODING_ATTR_CHARSET_LIST (attrs);
7681   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7682   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7683   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
7684
7685   if (c <= 0x7F)
7686     charset = charset_roman;
7687   else if (c >= 0xA0 && c < 0xDF)
7688     {
7689       charset = charset_kana;
7690       c -= 0x80;
7691     }
7692   else
7693     {
7694       int s1 = c >> 8, s2 = c & 0xFF;
7695
7696       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7697           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7698         error ("Invalid code: %d", code);
7699       SJIS_TO_JIS (c);
7700       charset = charset_kanji;
7701     }
7702   c = DECODE_CHAR (charset, c);
7703   if (c < 0)
7704     error ("Invalid code: %d", code);
7705   return make_number (c);
7706 }
7707
7708
7709 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7710        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7711 Return the corresponding code in SJIS.  */)
7712      (ch)
7713     Lisp_Object ch;
7714 {
7715   Lisp_Object spec, attrs, charset_list;
7716   int c;
7717   struct charset *charset;
7718   unsigned code;
7719
7720   CHECK_CHARACTER (ch);
7721   c = XFASTINT (ch);
7722   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7723   attrs = AREF (spec, 0);
7724
7725   if (ASCII_CHAR_P (c)
7726       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7727     return ch;
7728
7729   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7730   charset = char_charset (c, charset_list, &code);
7731   if (code == CHARSET_INVALID_CODE (charset))
7732     error ("Can't encode by shift_jis encoding: %d", c);
7733   JIS_TO_SJIS (code);
7734
7735   return make_number (code);
7736 }
7737
7738 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7739        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7740 Return the corresponding character.  */)
7741      (code)
7742      Lisp_Object code;
7743 {
7744   Lisp_Object spec, attrs, val;
7745   struct charset *charset_roman, *charset_big5, *charset;
7746   int c;
7747
7748   CHECK_NATNUM (code);
7749   c = XFASTINT (code);
7750   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7751   attrs = AREF (spec, 0);
7752
7753   if (ASCII_BYTE_P (c)
7754       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7755     return code;
7756
7757   val = CODING_ATTR_CHARSET_LIST (attrs);
7758   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7759   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7760
7761   if (c <= 0x7F)
7762     charset = charset_roman;
7763   else
7764     {
7765       int b1 = c >> 8, b2 = c & 0x7F;
7766       if (b1 < 0xA1 || b1 > 0xFE
7767           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7768         error ("Invalid code: %d", code);
7769       charset = charset_big5;
7770     }
7771   c = DECODE_CHAR (charset, (unsigned )c);
7772   if (c < 0)
7773     error ("Invalid code: %d", code);
7774   return make_number (c);
7775 }
7776
7777 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7778        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7779 Return the corresponding character code in Big5.  */)
7780      (ch)
7781      Lisp_Object ch;
7782 {
7783   Lisp_Object spec, attrs, charset_list;
7784   struct charset *charset;
7785   int c;
7786   unsigned code;
7787
7788   CHECK_CHARACTER (ch);
7789   c = XFASTINT (ch);
7790   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7791   attrs = AREF (spec, 0);
7792   if (ASCII_CHAR_P (c)
7793       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7794     return ch;
7795
7796   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7797   charset = char_charset (c, charset_list, &code);
7798   if (code == CHARSET_INVALID_CODE (charset))
7799     error ("Can't encode by Big5 encoding: %d", c);
7800
7801   return make_number (code);
7802 }
7803
7804 \f
7805 DEFUN ("set-terminal-coding-system-internal",
7806        Fset_terminal_coding_system_internal,
7807        Sset_terminal_coding_system_internal, 1, 1, 0,
7808        doc: /* Internal use only.  */)
7809      (coding_system)
7810      Lisp_Object coding_system;
7811 {
7812   CHECK_SYMBOL (coding_system);
7813   setup_coding_system (Fcheck_coding_system (coding_system),
7814                         &terminal_coding);
7815
7816   /* We had better not send unsafe characters to terminal.  */
7817   terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7818   /* Characer composition should be disabled.  */
7819   terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7820   terminal_coding.src_multibyte = 1;
7821   terminal_coding.dst_multibyte = 0;
7822   return Qnil;
7823 }
7824
7825 DEFUN ("set-safe-terminal-coding-system-internal",
7826        Fset_safe_terminal_coding_system_internal,
7827        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7828        doc: /* Internal use only.  */)
7829      (coding_system)
7830      Lisp_Object coding_system;
7831 {
7832   CHECK_SYMBOL (coding_system);
7833   setup_coding_system (Fcheck_coding_system (coding_system),
7834                        &safe_terminal_coding);
7835   /* Characer composition should be disabled.  */
7836   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7837   safe_terminal_coding.src_multibyte = 1;
7838   safe_terminal_coding.dst_multibyte = 0;
7839   return Qnil;
7840 }
7841
7842 DEFUN ("terminal-coding-system",
7843        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
7844        doc: /* Return coding system specified for terminal output.  */)
7845      ()
7846 {
7847   return CODING_ID_NAME (terminal_coding.id);
7848 }
7849
7850 DEFUN ("set-keyboard-coding-system-internal",
7851        Fset_keyboard_coding_system_internal,
7852        Sset_keyboard_coding_system_internal, 1, 1, 0,
7853        doc: /* Internal use only.  */)
7854      (coding_system)
7855      Lisp_Object coding_system;
7856 {
7857   CHECK_SYMBOL (coding_system);
7858   setup_coding_system (Fcheck_coding_system (coding_system),
7859                        &keyboard_coding);
7860   /* Characer composition should be disabled.  */
7861   keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7862   return Qnil;
7863 }
7864
7865 DEFUN ("keyboard-coding-system",
7866        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
7867        doc: /* Return coding system specified for decoding keyboard input.  */)
7868      ()
7869 {
7870   return CODING_ID_NAME (keyboard_coding.id);
7871 }
7872
7873 \f
7874 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7875        Sfind_operation_coding_system,  1, MANY, 0,
7876        doc: /* Choose a coding system for an operation based on the target name.
7877 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7878 DECODING-SYSTEM is the coding system to use for decoding
7879 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7880 for encoding (in case OPERATION does encoding).
7881
7882 The first argument OPERATION specifies an I/O primitive:
7883   For file I/O, `insert-file-contents' or `write-region'.
7884   For process I/O, `call-process', `call-process-region', or `start-process'.
7885   For network I/O, `open-network-stream'.
7886
7887 The remaining arguments should be the same arguments that were passed
7888 to the primitive.  Depending on which primitive, one of those arguments
7889 is selected as the TARGET.  For example, if OPERATION does file I/O,
7890 whichever argument specifies the file name is TARGET.
7891
7892 TARGET has a meaning which depends on OPERATION:
7893   For file I/O, TARGET is a file name.
7894   For process I/O, TARGET is a process name.
7895   For network I/O, TARGET is a service name or a port number
7896
7897 This function looks up what specified for TARGET in,
7898 `file-coding-system-alist', `process-coding-system-alist',
7899 or `network-coding-system-alist' depending on OPERATION.
7900 They may specify a coding system, a cons of coding systems,
7901 or a function symbol to call.
7902 In the last case, we call the function with one argument,
7903 which is a list of all the arguments given to this function.
7904
7905 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7906      (nargs, args)
7907      int nargs;
7908      Lisp_Object *args;
7909 {
7910   Lisp_Object operation, target_idx, target, val;
7911   register Lisp_Object chain;
7912
7913   if (nargs < 2)
7914     error ("Too few arguments");
7915   operation = args[0];
7916   if (!SYMBOLP (operation)
7917       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7918     error ("Invalid first arguement");
7919   if (nargs < 1 + XINT (target_idx))
7920     error ("Too few arguments for operation: %s",
7921            SDATA (SYMBOL_NAME (operation)));
7922   target = args[XINT (target_idx) + 1];
7923   if (!(STRINGP (target)
7924         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7925     error ("Invalid %dth argument", XINT (target_idx) + 1);
7926
7927   chain = ((EQ (operation, Qinsert_file_contents)
7928             || EQ (operation, Qwrite_region))
7929            ? Vfile_coding_system_alist
7930            : (EQ (operation, Qopen_network_stream)
7931               ? Vnetwork_coding_system_alist
7932               : Vprocess_coding_system_alist));
7933   if (NILP (chain))
7934     return Qnil;
7935
7936   for (; CONSP (chain); chain = XCDR (chain))
7937     {
7938       Lisp_Object elt;
7939
7940       elt = XCAR (chain);
7941       if (CONSP (elt)
7942           && ((STRINGP (target)
7943                && STRINGP (XCAR (elt))
7944                && fast_string_match (XCAR (elt), target) >= 0)
7945               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7946         {
7947           val = XCDR (elt);
7948           /* Here, if VAL is both a valid coding system and a valid
7949              function symbol, we return VAL as a coding system.  */
7950           if (CONSP (val))
7951             return val;
7952           if (! SYMBOLP (val))
7953             return Qnil;
7954           if (! NILP (Fcoding_system_p (val)))
7955             return Fcons (val, val);
7956           if (! NILP (Ffboundp (val)))
7957             {
7958               val = call1 (val, Flist (nargs, args));
7959               if (CONSP (val))
7960                 return val;
7961               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7962                 return Fcons (val, val);
7963             }
7964           return Qnil;
7965         }
7966     }
7967   return Qnil;
7968 }
7969
7970 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7971        Sset_coding_system_priority, 0, MANY, 0,
7972        doc: /* Assign higher priority to the coding systems given as arguments.
7973 If multiple coding systems belongs to the same category,
7974 all but the first one are ignored.
7975
7976 usage: (set-coding-system-priority ...)  */)
7977      (nargs, args)
7978      int nargs;
7979      Lisp_Object *args;
7980 {
7981   int i, j;
7982   int changed[coding_category_max];
7983   enum coding_category priorities[coding_category_max];
7984
7985   bzero (changed, sizeof changed);
7986
7987   for (i = j = 0; i < nargs; i++)
7988     {
7989       enum coding_category category;
7990       Lisp_Object spec, attrs;
7991
7992       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7993       attrs = AREF (spec, 0);
7994       category = XINT (CODING_ATTR_CATEGORY (attrs));
7995       if (changed[category])
7996         /* Ignore this coding system because a coding system of the
7997            same category already had a higher priority.  */
7998         continue;
7999       changed[category] = 1;
8000       priorities[j++] = category;
8001       if (coding_categories[category].id >= 0
8002           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8003         setup_coding_system (args[i], &coding_categories[category]);
8004       Fset (AREF (Vcoding_category_table, category), args[i]);
8005     }
8006
8007   /* Now we have decided top J priorities.  Reflect the order of the
8008      original priorities to the remaining priorities.  */
8009
8010   for (i = j, j = 0; i < coding_category_max; i++, j++)
8011     {
8012       while (j < coding_category_max
8013              && changed[coding_priorities[j]])
8014         j++;
8015       if (j == coding_category_max)
8016         abort ();
8017       priorities[i] = coding_priorities[j];
8018     }
8019
8020   bcopy (priorities, coding_priorities, sizeof priorities);
8021
8022   /* Update `coding-category-list'.  */
8023   Vcoding_category_list = Qnil;
8024   for (i = coding_category_max - 1; i >= 0; i--)
8025     Vcoding_category_list
8026       = Fcons (AREF (Vcoding_category_table, priorities[i]),
8027                Vcoding_category_list);
8028
8029   return Qnil;
8030 }
8031
8032 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8033        Scoding_system_priority_list, 0, 1, 0,
8034        doc: /* Return a list of coding systems ordered by their priorities.
8035 HIGHESTP non-nil means just return the highest priority one.  */)
8036      (highestp)
8037      Lisp_Object highestp;
8038 {
8039   int i;
8040   Lisp_Object val;
8041
8042   for (i = 0, val = Qnil; i < coding_category_max; i++)
8043     {
8044       enum coding_category category = coding_priorities[i];
8045       int id = coding_categories[category].id;
8046       Lisp_Object attrs;
8047
8048       if (id < 0)
8049         continue;
8050       attrs = CODING_ID_ATTRS (id);
8051       if (! NILP (highestp))
8052         return CODING_ATTR_BASE_NAME (attrs);
8053       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8054     }
8055   return Fnreverse (val);
8056 }
8057
8058 static char *suffixes[] = { "-unix", "-dos", "-mac" };
8059
8060 static Lisp_Object
8061 make_subsidiaries (base)
8062      Lisp_Object base;
8063 {
8064   Lisp_Object subsidiaries;
8065   int base_name_len = SBYTES (SYMBOL_NAME (base));
8066   char *buf = (char *) alloca (base_name_len + 6);
8067   int i;
8068
8069   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
8070   subsidiaries = Fmake_vector (make_number (3), Qnil);
8071   for (i = 0; i < 3; i++)
8072     {
8073       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
8074       ASET (subsidiaries, i, intern (buf));
8075     }
8076   return subsidiaries;
8077 }
8078
8079
8080 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
8081        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
8082        doc: /* For internal use only.
8083 usage: (define-coding-system-internal ...)  */)
8084      (nargs, args)
8085      int nargs;
8086      Lisp_Object *args;
8087 {
8088   Lisp_Object name;
8089   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
8090   Lisp_Object attrs;            /* Vector of attributes.  */
8091   Lisp_Object eol_type;
8092   Lisp_Object aliases;
8093   Lisp_Object coding_type, charset_list, safe_charsets;
8094   enum coding_category category;
8095   Lisp_Object tail, val;
8096   int max_charset_id = 0;
8097   int i;
8098
8099   if (nargs < coding_arg_max)
8100     goto short_args;
8101
8102   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
8103
8104   name = args[coding_arg_name];
8105   CHECK_SYMBOL (name);
8106   CODING_ATTR_BASE_NAME (attrs) = name;
8107
8108   val = args[coding_arg_mnemonic];
8109   if (! STRINGP (val))
8110     CHECK_CHARACTER (val);
8111   CODING_ATTR_MNEMONIC (attrs) = val;
8112
8113   coding_type = args[coding_arg_coding_type];
8114   CHECK_SYMBOL (coding_type);
8115   CODING_ATTR_TYPE (attrs) = coding_type;
8116
8117   charset_list = args[coding_arg_charset_list];
8118   if (SYMBOLP (charset_list))
8119     {
8120       if (EQ (charset_list, Qiso_2022))
8121         {
8122           if (! EQ (coding_type, Qiso_2022))
8123             error ("Invalid charset-list");
8124           charset_list = Viso_2022_charset_list;
8125         }
8126       else if (EQ (charset_list, Qemacs_mule))
8127         {
8128           if (! EQ (coding_type, Qemacs_mule))
8129             error ("Invalid charset-list");
8130           charset_list = Vemacs_mule_charset_list;
8131         }
8132       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8133         if (max_charset_id < XFASTINT (XCAR (tail)))
8134           max_charset_id = XFASTINT (XCAR (tail));
8135     }
8136   else
8137     {
8138       charset_list = Fcopy_sequence (charset_list);
8139       for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
8140         {
8141           struct charset *charset;
8142
8143           val = Fcar (tail);
8144           CHECK_CHARSET_GET_CHARSET (val, charset);
8145           if (EQ (coding_type, Qiso_2022)
8146               ? CHARSET_ISO_FINAL (charset) < 0
8147               : EQ (coding_type, Qemacs_mule)
8148               ? CHARSET_EMACS_MULE_ID (charset) < 0
8149               : 0)
8150             error ("Can't handle charset `%s'",
8151                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8152
8153           XSETCAR (tail, make_number (charset->id));
8154           if (max_charset_id < charset->id)
8155             max_charset_id = charset->id;
8156         }
8157     }
8158   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
8159
8160   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8161                                 make_number (255));
8162   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8163     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
8164   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
8165
8166   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
8167
8168   val = args[coding_arg_decode_translation_table];
8169   if (! CHAR_TABLE_P (val))
8170     CHECK_SYMBOL (val);
8171   CODING_ATTR_DECODE_TBL (attrs) = val;
8172
8173   val = args[coding_arg_encode_translation_table];
8174   if (! CHAR_TABLE_P (val))
8175     CHECK_SYMBOL (val);
8176   CODING_ATTR_ENCODE_TBL (attrs) = val;
8177
8178   val = args[coding_arg_post_read_conversion];
8179   CHECK_SYMBOL (val);
8180   CODING_ATTR_POST_READ (attrs) = val;
8181
8182   val = args[coding_arg_pre_write_conversion];
8183   CHECK_SYMBOL (val);
8184   CODING_ATTR_PRE_WRITE (attrs) = val;
8185
8186   val = args[coding_arg_default_char];
8187   if (NILP (val))
8188     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8189   else
8190     {
8191       CHECK_CHARACTER (val);
8192       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8193     }
8194
8195   val = args[coding_arg_for_unibyte];
8196   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
8197
8198   val = args[coding_arg_plist];
8199   CHECK_LIST (val);
8200   CODING_ATTR_PLIST (attrs) = val;
8201
8202   if (EQ (coding_type, Qcharset))
8203     {
8204       /* Generate a lisp vector of 256 elements.  Each element is nil,
8205          integer, or a list of charset IDs.
8206
8207          If Nth element is nil, the byte code N is invalid in this
8208          coding system.
8209
8210          If Nth element is a number NUM, N is the first byte of a
8211          charset whose ID is NUM.
8212
8213          If Nth element is a list of charset IDs, N is the first byte
8214          of one of them.  The list is sorted by dimensions of the
8215          charsets.  A charset of smaller dimension comes firtst. */
8216       val = Fmake_vector (make_number (256), Qnil);
8217
8218       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8219         {
8220           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8221           int dim = CHARSET_DIMENSION (charset);
8222           int idx = (dim - 1) * 4;
8223
8224           if (CHARSET_ASCII_COMPATIBLE_P (charset))
8225             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8226
8227           for (i = charset->code_space[idx];
8228                i <= charset->code_space[idx + 1]; i++)
8229             {
8230               Lisp_Object tmp, tmp2;
8231               int dim2;
8232
8233               tmp = AREF (val, i);
8234               if (NILP (tmp))
8235                 tmp = XCAR (tail);
8236               else if (NUMBERP (tmp))
8237                 {
8238                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8239                   if (dim < dim2)
8240                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
8241                   else
8242                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
8243                 }
8244               else
8245                 {
8246                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8247                     {
8248                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8249                       if (dim < dim2)
8250                         break;
8251                     }
8252                   if (NILP (tmp2))
8253                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8254                   else
8255                     {
8256                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8257                       XSETCAR (tmp2, XCAR (tail));
8258                     }
8259                 }
8260               ASET (val, i, tmp);
8261             }
8262         }
8263       ASET (attrs, coding_attr_charset_valids, val);
8264       category = coding_category_charset;
8265     }
8266   else if (EQ (coding_type, Qccl))
8267     {
8268       Lisp_Object valids;
8269
8270       if (nargs < coding_arg_ccl_max)
8271         goto short_args;
8272
8273       val = args[coding_arg_ccl_decoder];
8274       CHECK_CCL_PROGRAM (val);
8275       if (VECTORP (val))
8276         val = Fcopy_sequence (val);
8277       ASET (attrs, coding_attr_ccl_decoder, val);
8278
8279       val = args[coding_arg_ccl_encoder];
8280       CHECK_CCL_PROGRAM (val);
8281       if (VECTORP (val))
8282         val = Fcopy_sequence (val);
8283       ASET (attrs, coding_attr_ccl_encoder, val);
8284
8285       val = args[coding_arg_ccl_valids];
8286       valids = Fmake_string (make_number (256), make_number (0));
8287       for (tail = val; !NILP (tail); tail = Fcdr (tail))
8288         {
8289           int from, to;
8290
8291           val = Fcar (tail);
8292           if (INTEGERP (val))
8293             {
8294               from = to = XINT (val);
8295               if (from < 0 || from > 255)
8296                 args_out_of_range_3 (val, make_number (0), make_number (255));
8297             }
8298           else
8299             {
8300               CHECK_CONS (val);
8301               CHECK_NATNUM_CAR (val);
8302               CHECK_NATNUM_CDR (val);
8303               from = XINT (XCAR (val));
8304               if (from > 255)
8305                 args_out_of_range_3 (XCAR (val),
8306                                      make_number (0), make_number (255));
8307               to = XINT (XCDR (val));
8308               if (to < from || to > 255)
8309                 args_out_of_range_3 (XCDR (val),
8310                                      XCAR (val), make_number (255));
8311             }
8312           for (i = from; i <= to; i++)
8313             SSET (valids, i, 1);
8314         }
8315       ASET (attrs, coding_attr_ccl_valids, valids);
8316
8317       category = coding_category_ccl;
8318     }
8319   else if (EQ (coding_type, Qutf_16))
8320     {
8321       Lisp_Object bom, endian;
8322
8323       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8324
8325       if (nargs < coding_arg_utf16_max)
8326         goto short_args;
8327
8328       bom = args[coding_arg_utf16_bom];
8329       if (! NILP (bom) && ! EQ (bom, Qt))
8330         {
8331           CHECK_CONS (bom);
8332           val = XCAR (bom);
8333           CHECK_CODING_SYSTEM (val);
8334           val = XCDR (bom);
8335           CHECK_CODING_SYSTEM (val);
8336         }
8337       ASET (attrs, coding_attr_utf_16_bom, bom);
8338
8339       endian = args[coding_arg_utf16_endian];
8340       CHECK_SYMBOL (endian);
8341       if (NILP (endian))
8342         endian = Qbig;
8343       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8344         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
8345       ASET (attrs, coding_attr_utf_16_endian, endian);
8346
8347       category = (CONSP (bom)
8348                   ? coding_category_utf_16_auto
8349                   : NILP (bom)
8350                   ? (EQ (endian, Qbig)
8351                      ? coding_category_utf_16_be_nosig
8352                      : coding_category_utf_16_le_nosig)
8353                   : (EQ (endian, Qbig)
8354                      ? coding_category_utf_16_be
8355                      : coding_category_utf_16_le));
8356     }
8357   else if (EQ (coding_type, Qiso_2022))
8358     {
8359       Lisp_Object initial, reg_usage, request, flags;
8360       int i;
8361
8362       if (nargs < coding_arg_iso2022_max)
8363         goto short_args;
8364
8365       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8366       CHECK_VECTOR (initial);
8367       for (i = 0; i < 4; i++)
8368         {
8369           val = Faref (initial, make_number (i));
8370           if (! NILP (val))
8371             {
8372               struct charset *charset;
8373
8374               CHECK_CHARSET_GET_CHARSET (val, charset);
8375               ASET (initial, i, make_number (CHARSET_ID (charset)));
8376               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8377                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8378             }
8379           else
8380             ASET (initial, i, make_number (-1));
8381         }
8382
8383       reg_usage = args[coding_arg_iso2022_reg_usage];
8384       CHECK_CONS (reg_usage);
8385       CHECK_NUMBER_CAR (reg_usage);
8386       CHECK_NUMBER_CDR (reg_usage);
8387
8388       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8389       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
8390         {
8391           int id;
8392           Lisp_Object tmp;
8393
8394           val = Fcar (tail);
8395           CHECK_CONS (val);
8396           tmp = XCAR (val);
8397           CHECK_CHARSET_GET_ID (tmp, id);
8398           CHECK_NATNUM_CDR (val);
8399           if (XINT (XCDR (val)) >= 4)
8400             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8401           XSETCAR (val, make_number (id));
8402         }
8403
8404       flags = args[coding_arg_iso2022_flags];
8405       CHECK_NATNUM (flags);
8406       i = XINT (flags);
8407       if (EQ (args[coding_arg_charset_list], Qiso_2022))
8408         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8409
8410       ASET (attrs, coding_attr_iso_initial, initial);
8411       ASET (attrs, coding_attr_iso_usage, reg_usage);
8412       ASET (attrs, coding_attr_iso_request, request);
8413       ASET (attrs, coding_attr_iso_flags, flags);
8414       setup_iso_safe_charsets (attrs);
8415
8416       if (i & CODING_ISO_FLAG_SEVEN_BITS)
8417         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8418                           | CODING_ISO_FLAG_SINGLE_SHIFT))
8419                     ? coding_category_iso_7_else
8420                     : EQ (args[coding_arg_charset_list], Qiso_2022)
8421                     ? coding_category_iso_7
8422                     : coding_category_iso_7_tight);
8423       else
8424         {
8425           int id = XINT (AREF (initial, 1));
8426
8427           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
8428                        || EQ (args[coding_arg_charset_list], Qiso_2022)
8429                        || id < 0)
8430                       ? coding_category_iso_8_else
8431                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8432                       ? coding_category_iso_8_1
8433                       : coding_category_iso_8_2);
8434         }
8435       if (category != coding_category_iso_8_1
8436           && category != coding_category_iso_8_2)
8437         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8438     }
8439   else if (EQ (coding_type, Qemacs_mule))
8440     {
8441       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8442         ASET (attrs, coding_attr_emacs_mule_full, Qt);
8443       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8444       category = coding_category_emacs_mule;
8445     }
8446   else if (EQ (coding_type, Qshift_jis))
8447     {
8448
8449       struct charset *charset;
8450
8451       if (XINT (Flength (charset_list)) != 3
8452           && XINT (Flength (charset_list)) != 4)
8453         error ("There should be three or four charsets");
8454
8455       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8456       if (CHARSET_DIMENSION (charset) != 1)
8457         error ("Dimension of charset %s is not one",
8458                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8459       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8460         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8461
8462       charset_list = XCDR (charset_list);
8463       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8464       if (CHARSET_DIMENSION (charset) != 1)
8465         error ("Dimension of charset %s is not one",
8466                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8467
8468       charset_list = XCDR (charset_list);
8469       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8470       if (CHARSET_DIMENSION (charset) != 2)
8471         error ("Dimension of charset %s is not two",
8472                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8473
8474       charset_list = XCDR (charset_list);
8475       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8476       if (CHARSET_DIMENSION (charset) != 2)
8477         error ("Dimension of charset %s is not two",
8478                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8479
8480       category = coding_category_sjis;
8481       Vsjis_coding_system = name;
8482     }
8483   else if (EQ (coding_type, Qbig5))
8484     {
8485       struct charset *charset;
8486
8487       if (XINT (Flength (charset_list)) != 2)
8488         error ("There should be just two charsets");
8489
8490       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8491       if (CHARSET_DIMENSION (charset) != 1)
8492         error ("Dimension of charset %s is not one",
8493                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8494       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8495         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8496
8497       charset_list = XCDR (charset_list);
8498       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8499       if (CHARSET_DIMENSION (charset) != 2)
8500         error ("Dimension of charset %s is not two",
8501                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8502
8503       category = coding_category_big5;
8504       Vbig5_coding_system = name;
8505     }
8506   else if (EQ (coding_type, Qraw_text))
8507     {
8508       category = coding_category_raw_text;
8509       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8510     }
8511   else if (EQ (coding_type, Qutf_8))
8512     {
8513       category = coding_category_utf_8;
8514       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8515     }
8516   else if (EQ (coding_type, Qundecided))
8517     category = coding_category_undecided;
8518   else
8519     error ("Invalid coding system type: %s",
8520            SDATA (SYMBOL_NAME (coding_type)));
8521
8522   CODING_ATTR_CATEGORY (attrs) = make_number (category);
8523   CODING_ATTR_PLIST (attrs)
8524     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
8525                                 CODING_ATTR_PLIST (attrs)));
8526
8527   eol_type = args[coding_arg_eol_type];
8528   if (! NILP (eol_type)
8529       && ! EQ (eol_type, Qunix)
8530       && ! EQ (eol_type, Qdos)
8531       && ! EQ (eol_type, Qmac))
8532     error ("Invalid eol-type");
8533
8534   aliases = Fcons (name, Qnil);
8535
8536   if (NILP (eol_type))
8537     {
8538       eol_type = make_subsidiaries (name);
8539       for (i = 0; i < 3; i++)
8540         {
8541           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8542
8543           this_name = AREF (eol_type, i);
8544           this_aliases = Fcons (this_name, Qnil);
8545           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8546           this_spec = Fmake_vector (make_number (3), attrs);
8547           ASET (this_spec, 1, this_aliases);
8548           ASET (this_spec, 2, this_eol_type);
8549           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8550           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8551           Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8552                                         Vcoding_system_alist);
8553         }
8554     }
8555
8556   spec_vec = Fmake_vector (make_number (3), attrs);
8557   ASET (spec_vec, 1, aliases);
8558   ASET (spec_vec, 2, eol_type);
8559
8560   Fputhash (name, spec_vec, Vcoding_system_hash_table);
8561   Vcoding_system_list = Fcons (name, Vcoding_system_list);
8562   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8563                                 Vcoding_system_alist);
8564
8565   {
8566     int id = coding_categories[category].id;
8567
8568     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8569       setup_coding_system (name, &coding_categories[category]);
8570   }
8571
8572   return Qnil;
8573
8574  short_args:
8575   return Fsignal (Qwrong_number_of_arguments,
8576                   Fcons (intern ("define-coding-system-internal"),
8577                          make_number (nargs)));
8578 }
8579
8580
8581 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8582        Sdefine_coding_system_alias, 2, 2, 0,
8583        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
8584      (alias, coding_system)
8585      Lisp_Object alias, coding_system;
8586 {
8587   Lisp_Object spec, aliases, eol_type;
8588
8589   CHECK_SYMBOL (alias);
8590   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8591   aliases = AREF (spec, 1);
8592   /* ALISES should be a list of length more than zero, and the first
8593      element is a base coding system.  Append ALIAS at the tail of the
8594      list.  */
8595   while (!NILP (XCDR (aliases)))
8596     aliases = XCDR (aliases);
8597   XSETCDR (aliases, Fcons (alias, Qnil));
8598
8599   eol_type = AREF (spec, 2);
8600   if (VECTORP (eol_type))
8601     {
8602       Lisp_Object subsidiaries;
8603       int i;
8604
8605       subsidiaries = make_subsidiaries (alias);
8606       for (i = 0; i < 3; i++)
8607         Fdefine_coding_system_alias (AREF (subsidiaries, i),
8608                                      AREF (eol_type, i));
8609     }
8610
8611   Fputhash (alias, spec, Vcoding_system_hash_table);
8612   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
8613   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8614                                 Vcoding_system_alist);
8615
8616   return Qnil;
8617 }
8618
8619 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8620        1, 1, 0,
8621        doc: /* Return the base of CODING-SYSTEM.
8622 Any alias or subsidiary coding system is not a base coding system.  */)
8623   (coding_system)
8624      Lisp_Object coding_system;
8625 {
8626   Lisp_Object spec, attrs;
8627
8628   if (NILP (coding_system))
8629     return (Qno_conversion);
8630   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8631   attrs = AREF (spec, 0);
8632   return CODING_ATTR_BASE_NAME (attrs);
8633 }
8634
8635 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8636        1, 1, 0,
8637        doc: "Return the property list of CODING-SYSTEM.")
8638      (coding_system)
8639      Lisp_Object coding_system;
8640 {
8641   Lisp_Object spec, attrs;
8642
8643   if (NILP (coding_system))
8644     coding_system = Qno_conversion;
8645   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8646   attrs = AREF (spec, 0);
8647   return CODING_ATTR_PLIST (attrs);
8648 }
8649
8650
8651 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8652        1, 1, 0,
8653        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
8654      (coding_system)
8655      Lisp_Object coding_system;
8656 {
8657   Lisp_Object spec;
8658
8659   if (NILP (coding_system))
8660     coding_system = Qno_conversion;
8661   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8662   return AREF (spec, 1);
8663 }
8664
8665 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8666        Scoding_system_eol_type, 1, 1, 0,
8667        doc: /* Return eol-type of CODING-SYSTEM.
8668 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8669
8670 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8671 and CR respectively.
8672
8673 A vector value indicates that a format of end-of-line should be
8674 detected automatically.  Nth element of the vector is the subsidiary
8675 coding system whose eol-type is N.  */)
8676      (coding_system)
8677      Lisp_Object coding_system;
8678 {
8679   Lisp_Object spec, eol_type;
8680   int n;
8681
8682   if (NILP (coding_system))
8683     coding_system = Qno_conversion;
8684   if (! CODING_SYSTEM_P (coding_system))
8685     return Qnil;
8686   spec = CODING_SYSTEM_SPEC (coding_system);
8687   eol_type = AREF (spec, 2);
8688   if (VECTORP (eol_type))
8689     return Fcopy_sequence (eol_type);
8690   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8691   return make_number (n);
8692 }
8693
8694 #endif /* emacs */
8695
8696 \f
8697 /*** 9. Post-amble ***/
8698
8699 void
8700 init_coding_once ()
8701 {
8702   int i;
8703
8704   for (i = 0; i < coding_category_max; i++)
8705     {
8706       coding_categories[i].id = -1;
8707       coding_priorities[i] = i;
8708     }
8709
8710   /* ISO2022 specific initialize routine.  */
8711   for (i = 0; i < 0x20; i++)
8712     iso_code_class[i] = ISO_control_0;
8713   for (i = 0x21; i < 0x7F; i++)
8714     iso_code_class[i] = ISO_graphic_plane_0;
8715   for (i = 0x80; i < 0xA0; i++)
8716     iso_code_class[i] = ISO_control_1;
8717   for (i = 0xA1; i < 0xFF; i++)
8718     iso_code_class[i] = ISO_graphic_plane_1;
8719   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8720   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8721   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8722   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8723   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8724   iso_code_class[ISO_CODE_ESC] = ISO_escape;
8725   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8726   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
8727   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
8728
8729   for (i = 0; i < 256; i++)
8730     {
8731       emacs_mule_bytes[i] = 1;
8732     }
8733   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
8734   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
8735   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
8736   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
8737 }
8738
8739 #ifdef emacs
8740
8741 void
8742 syms_of_coding ()
8743 {
8744   staticpro (&Vcoding_system_hash_table);
8745   {
8746     Lisp_Object args[2];
8747     args[0] = QCtest;
8748     args[1] = Qeq;
8749     Vcoding_system_hash_table = Fmake_hash_table (2, args);
8750   }
8751
8752   staticpro (&Vsjis_coding_system);
8753   Vsjis_coding_system = Qnil;
8754
8755   staticpro (&Vbig5_coding_system);
8756   Vbig5_coding_system = Qnil;
8757
8758   staticpro (&Vcode_conversion_reused_workbuf);
8759   Vcode_conversion_reused_workbuf = Qnil;
8760
8761   staticpro (&Vcode_conversion_workbuf_name);
8762   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
8763
8764   reused_workbuf_in_use = 0;
8765
8766   DEFSYM (Qcharset, "charset");
8767   DEFSYM (Qtarget_idx, "target-idx");
8768   DEFSYM (Qcoding_system_history, "coding-system-history");
8769   Fset (Qcoding_system_history, Qnil);
8770
8771   /* Target FILENAME is the first argument.  */
8772   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
8773   /* Target FILENAME is the third argument.  */
8774   Fput (Qwrite_region, Qtarget_idx, make_number (2));
8775
8776   DEFSYM (Qcall_process, "call-process");
8777   /* Target PROGRAM is the first argument.  */
8778   Fput (Qcall_process, Qtarget_idx, make_number (0));
8779
8780   DEFSYM (Qcall_process_region, "call-process-region");
8781   /* Target PROGRAM is the third argument.  */
8782   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
8783
8784   DEFSYM (Qstart_process, "start-process");
8785   /* Target PROGRAM is the third argument.  */
8786   Fput (Qstart_process, Qtarget_idx, make_number (2));
8787
8788   DEFSYM (Qopen_network_stream, "open-network-stream");
8789   /* Target SERVICE is the fourth argument.  */
8790   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8791
8792   DEFSYM (Qcoding_system, "coding-system");
8793   DEFSYM (Qcoding_aliases, "coding-aliases");
8794
8795   DEFSYM (Qeol_type, "eol-type");
8796   DEFSYM (Qunix, "unix");
8797   DEFSYM (Qdos, "dos");
8798
8799   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8800   DEFSYM (Qpost_read_conversion, "post-read-conversion");
8801   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8802   DEFSYM (Qdefault_char, "default-char");
8803   DEFSYM (Qundecided, "undecided");
8804   DEFSYM (Qno_conversion, "no-conversion");
8805   DEFSYM (Qraw_text, "raw-text");
8806
8807   DEFSYM (Qiso_2022, "iso-2022");
8808
8809   DEFSYM (Qutf_8, "utf-8");
8810   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
8811
8812   DEFSYM (Qutf_16, "utf-16");
8813   DEFSYM (Qbig, "big");
8814   DEFSYM (Qlittle, "little");
8815
8816   DEFSYM (Qshift_jis, "shift-jis");
8817   DEFSYM (Qbig5, "big5");
8818
8819   DEFSYM (Qcoding_system_p, "coding-system-p");
8820
8821   DEFSYM (Qcoding_system_error, "coding-system-error");
8822   Fput (Qcoding_system_error, Qerror_conditions,
8823         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
8824   Fput (Qcoding_system_error, Qerror_message,
8825         build_string ("Invalid coding system"));
8826
8827   /* Intern this now in case it isn't already done.
8828      Setting this variable twice is harmless.
8829      But don't staticpro it here--that is done in alloc.c.  */
8830   Qchar_table_extra_slots = intern ("char-table-extra-slots");
8831
8832   DEFSYM (Qtranslation_table, "translation-table");
8833   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
8834   DEFSYM (Qtranslation_table_id, "translation-table-id");
8835   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8836   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
8837
8838   DEFSYM (Qvalid_codes, "valid-codes");
8839
8840   DEFSYM (Qemacs_mule, "emacs-mule");
8841
8842   DEFSYM (QCcategory, ":category");
8843
8844   Vcoding_category_table
8845     = Fmake_vector (make_number (coding_category_max), Qnil);
8846   staticpro (&Vcoding_category_table);
8847   /* Followings are target of code detection.  */
8848   ASET (Vcoding_category_table, coding_category_iso_7,
8849         intern ("coding-category-iso-7"));
8850   ASET (Vcoding_category_table, coding_category_iso_7_tight,
8851         intern ("coding-category-iso-7-tight"));
8852   ASET (Vcoding_category_table, coding_category_iso_8_1,
8853         intern ("coding-category-iso-8-1"));
8854   ASET (Vcoding_category_table, coding_category_iso_8_2,
8855         intern ("coding-category-iso-8-2"));
8856   ASET (Vcoding_category_table, coding_category_iso_7_else,
8857         intern ("coding-category-iso-7-else"));
8858   ASET (Vcoding_category_table, coding_category_iso_8_else,
8859         intern ("coding-category-iso-8-else"));
8860   ASET (Vcoding_category_table, coding_category_utf_8,
8861         intern ("coding-category-utf-8"));
8862   ASET (Vcoding_category_table, coding_category_utf_16_be,
8863         intern ("coding-category-utf-16-be"));
8864   ASET (Vcoding_category_table, coding_category_utf_16_auto,
8865         intern ("coding-category-utf-16-auto"));
8866   ASET (Vcoding_category_table, coding_category_utf_16_le,
8867         intern ("coding-category-utf-16-le"));
8868   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8869         intern ("coding-category-utf-16-be-nosig"));
8870   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8871         intern ("coding-category-utf-16-le-nosig"));
8872   ASET (Vcoding_category_table, coding_category_charset,
8873         intern ("coding-category-charset"));
8874   ASET (Vcoding_category_table, coding_category_sjis,
8875         intern ("coding-category-sjis"));
8876   ASET (Vcoding_category_table, coding_category_big5,
8877         intern ("coding-category-big5"));
8878   ASET (Vcoding_category_table, coding_category_ccl,
8879         intern ("coding-category-ccl"));
8880   ASET (Vcoding_category_table, coding_category_emacs_mule,
8881         intern ("coding-category-emacs-mule"));
8882   /* Followings are NOT target of code detection.  */
8883   ASET (Vcoding_category_table, coding_category_raw_text,
8884         intern ("coding-category-raw-text"));
8885   ASET (Vcoding_category_table, coding_category_undecided,
8886         intern ("coding-category-undecided"));
8887
8888   DEFSYM (Qinsufficient_source, "insufficient-source");
8889   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
8890   DEFSYM (Qinvalid_source, "invalid-source");
8891   DEFSYM (Qinterrupted, "interrupted");
8892   DEFSYM (Qinsufficient_memory, "insufficient-memory");
8893
8894   defsubr (&Scoding_system_p);
8895   defsubr (&Sread_coding_system);
8896   defsubr (&Sread_non_nil_coding_system);
8897   defsubr (&Scheck_coding_system);
8898   defsubr (&Sdetect_coding_region);
8899   defsubr (&Sdetect_coding_string);
8900   defsubr (&Sfind_coding_systems_region_internal);
8901   defsubr (&Sunencodable_char_position);
8902   defsubr (&Scheck_coding_systems_region);
8903   defsubr (&Sdecode_coding_region);
8904   defsubr (&Sencode_coding_region);
8905   defsubr (&Sdecode_coding_string);
8906   defsubr (&Sencode_coding_string);
8907   defsubr (&Sdecode_sjis_char);
8908   defsubr (&Sencode_sjis_char);
8909   defsubr (&Sdecode_big5_char);
8910   defsubr (&Sencode_big5_char);
8911   defsubr (&Sset_terminal_coding_system_internal);
8912   defsubr (&Sset_safe_terminal_coding_system_internal);
8913   defsubr (&Sterminal_coding_system);
8914   defsubr (&Sset_keyboard_coding_system_internal);
8915   defsubr (&Skeyboard_coding_system);
8916   defsubr (&Sfind_operation_coding_system);
8917   defsubr (&Sset_coding_system_priority);
8918   defsubr (&Sdefine_coding_system_internal);
8919   defsubr (&Sdefine_coding_system_alias);
8920   defsubr (&Scoding_system_base);
8921   defsubr (&Scoding_system_plist);
8922   defsubr (&Scoding_system_aliases);
8923   defsubr (&Scoding_system_eol_type);
8924   defsubr (&Scoding_system_priority_list);
8925
8926   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
8927                doc: /* List of coding systems.
8928
8929 Do not alter the value of this variable manually.  This variable should be
8930 updated by the functions `define-coding-system' and
8931 `define-coding-system-alias'.  */);
8932   Vcoding_system_list = Qnil;
8933
8934   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
8935                doc: /* Alist of coding system names.
8936 Each element is one element list of coding system name.
8937 This variable is given to `completing-read' as TABLE argument.
8938
8939 Do not alter the value of this variable manually.  This variable should be
8940 updated by the functions `make-coding-system' and
8941 `define-coding-system-alias'.  */);
8942   Vcoding_system_alist = Qnil;
8943
8944   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
8945                doc: /* List of coding-categories (symbols) ordered by priority.
8946
8947 On detecting a coding system, Emacs tries code detection algorithms
8948 associated with each coding-category one by one in this order.  When
8949 one algorithm agrees with a byte sequence of source text, the coding
8950 system bound to the corresponding coding-category is selected.  */);
8951   {
8952     int i;
8953
8954     Vcoding_category_list = Qnil;
8955     for (i = coding_category_max - 1; i >= 0; i--)
8956       Vcoding_category_list
8957         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8958                  Vcoding_category_list);
8959   }
8960
8961   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
8962                doc: /* Specify the coding system for read operations.
8963 It is useful to bind this variable with `let', but do not set it globally.
8964 If the value is a coding system, it is used for decoding on read operation.
8965 If not, an appropriate element is used from one of the coding system alists:
8966 There are three such tables, `file-coding-system-alist',
8967 `process-coding-system-alist', and `network-coding-system-alist'.  */);
8968   Vcoding_system_for_read = Qnil;
8969
8970   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
8971                doc: /* Specify the coding system for write operations.
8972 Programs bind this variable with `let', but you should not set it globally.
8973 If the value is a coding system, it is used for encoding of output,
8974 when writing it to a file and when sending it to a file or subprocess.
8975
8976 If this does not specify a coding system, an appropriate element
8977 is used from one of the coding system alists:
8978 There are three such tables, `file-coding-system-alist',
8979 `process-coding-system-alist', and `network-coding-system-alist'.
8980 For output to files, if the above procedure does not specify a coding system,
8981 the value of `buffer-file-coding-system' is used.  */);
8982   Vcoding_system_for_write = Qnil;
8983
8984   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
8985                doc: /*
8986 Coding system used in the latest file or process I/O.  */);
8987   Vlast_coding_system_used = Qnil;
8988
8989   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
8990                doc: /*
8991 Error status of the last code conversion.
8992
8993 When an error was detected in the last code conversion, this variable
8994 is set to one of the following symbols.
8995   `insufficient-source'
8996   `inconsistent-eol'
8997   `invalid-source'
8998   `interrupted'
8999   `insufficient-memory'
9000 When no error was detected, the value doesn't change.  So, to check
9001 the error status of a code conversion by this variable, you must
9002 explicitly set this variable to nil before performing code
9003 conversion.  */);
9004   Vlast_code_conversion_error = Qnil;
9005
9006   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
9007                doc: /*
9008 *Non-nil means always inhibit code conversion of end-of-line format.
9009 See info node `Coding Systems' and info node `Text and Binary' concerning
9010 such conversion.  */);
9011   inhibit_eol_conversion = 0;
9012
9013   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
9014                doc: /*
9015 Non-nil means process buffer inherits coding system of process output.
9016 Bind it to t if the process output is to be treated as if it were a file
9017 read from some filesystem.  */);
9018   inherit_process_coding_system = 0;
9019
9020   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
9021                doc: /*
9022 Alist to decide a coding system to use for a file I/O operation.
9023 The format is ((PATTERN . VAL) ...),
9024 where PATTERN is a regular expression matching a file name,
9025 VAL is a coding system, a cons of coding systems, or a function symbol.
9026 If VAL is a coding system, it is used for both decoding and encoding
9027 the file contents.
9028 If VAL is a cons of coding systems, the car part is used for decoding,
9029 and the cdr part is used for encoding.
9030 If VAL is a function symbol, the function must return a coding system
9031 or a cons of coding systems which are used as above.  The function gets
9032 the arguments with which `find-operation-coding-systems' was called.
9033
9034 See also the function `find-operation-coding-system'
9035 and the variable `auto-coding-alist'.  */);
9036   Vfile_coding_system_alist = Qnil;
9037
9038   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
9039                doc: /*
9040 Alist to decide a coding system to use for a process I/O operation.
9041 The format is ((PATTERN . VAL) ...),
9042 where PATTERN is a regular expression matching a program name,
9043 VAL is a coding system, a cons of coding systems, or a function symbol.
9044 If VAL is a coding system, it is used for both decoding what received
9045 from the program and encoding what sent to the program.
9046 If VAL is a cons of coding systems, the car part is used for decoding,
9047 and the cdr part is used for encoding.
9048 If VAL is a function symbol, the function must return a coding system
9049 or a cons of coding systems which are used as above.
9050
9051 See also the function `find-operation-coding-system'.  */);
9052   Vprocess_coding_system_alist = Qnil;
9053
9054   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
9055                doc: /*
9056 Alist to decide a coding system to use for a network I/O operation.
9057 The format is ((PATTERN . VAL) ...),
9058 where PATTERN is a regular expression matching a network service name
9059 or is a port number to connect to,
9060 VAL is a coding system, a cons of coding systems, or a function symbol.
9061 If VAL is a coding system, it is used for both decoding what received
9062 from the network stream and encoding what sent to the network stream.
9063 If VAL is a cons of coding systems, the car part is used for decoding,
9064 and the cdr part is used for encoding.
9065 If VAL is a function symbol, the function must return a coding system
9066 or a cons of coding systems which are used as above.
9067
9068 See also the function `find-operation-coding-system'.  */);
9069   Vnetwork_coding_system_alist = Qnil;
9070
9071   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
9072                doc: /* Coding system to use with system messages.
9073 Also used for decoding keyboard input on X Window system.  */);
9074   Vlocale_coding_system = Qnil;
9075
9076   /* The eol mnemonics are reset in startup.el system-dependently.  */
9077   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
9078                doc: /*
9079 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
9080   eol_mnemonic_unix = build_string (":");
9081
9082   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
9083                doc: /*
9084 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
9085   eol_mnemonic_dos = build_string ("\\");
9086
9087   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
9088                doc: /*
9089 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
9090   eol_mnemonic_mac = build_string ("/");
9091
9092   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
9093                doc: /*
9094 *String displayed in mode line when end-of-line format is not yet determined.  */);
9095   eol_mnemonic_undecided = build_string (":");
9096
9097   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
9098                doc: /*
9099 *Non-nil enables character translation while encoding and decoding.  */);
9100   Venable_character_translation = Qt;
9101
9102   DEFVAR_LISP ("standard-translation-table-for-decode",
9103                &Vstandard_translation_table_for_decode,
9104                doc: /* Table for translating characters while decoding.  */);
9105   Vstandard_translation_table_for_decode = Qnil;
9106
9107   DEFVAR_LISP ("standard-translation-table-for-encode",
9108                &Vstandard_translation_table_for_encode,
9109                doc: /* Table for translating characters while encoding.  */);
9110   Vstandard_translation_table_for_encode = Qnil;
9111
9112   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
9113                doc: /* Alist of charsets vs revision numbers.
9114 While encoding, if a charset (car part of an element) is found,
9115 designate it with the escape sequence identifying revision (cdr part
9116 of the element).  */);
9117   Vcharset_revision_table = Qnil;
9118
9119   DEFVAR_LISP ("default-process-coding-system",
9120                &Vdefault_process_coding_system,
9121                doc: /* Cons of coding systems used for process I/O by default.
9122 The car part is used for decoding a process output,
9123 the cdr part is used for encoding a text to be sent to a process.  */);
9124   Vdefault_process_coding_system = Qnil;
9125
9126   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
9127                doc: /*
9128 Table of extra Latin codes in the range 128..159 (inclusive).
9129 This is a vector of length 256.
9130 If Nth element is non-nil, the existence of code N in a file
9131 \(or output of subprocess) doesn't prevent it to be detected as
9132 a coding system of ISO 2022 variant which has a flag
9133 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9134 or reading output of a subprocess.
9135 Only 128th through 159th elements has a meaning.  */);
9136   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
9137
9138   DEFVAR_LISP ("select-safe-coding-system-function",
9139                &Vselect_safe_coding_system_function,
9140                doc: /*
9141 Function to call to select safe coding system for encoding a text.
9142
9143 If set, this function is called to force a user to select a proper
9144 coding system which can encode the text in the case that a default
9145 coding system used in each operation can't encode the text.
9146
9147 The default value is `select-safe-coding-system' (which see).  */);
9148   Vselect_safe_coding_system_function = Qnil;
9149
9150   DEFVAR_BOOL ("coding-system-require-warning",
9151                &coding_system_require_warning,
9152                doc: /* Internal use only.
9153 If non-nil, on writing a file, `select-safe-coding-system-function' is
9154 called even if `coding-system-for-write' is non-nil.  The command
9155 `universal-coding-system-argument' binds this variable to t temporarily.  */);
9156   coding_system_require_warning = 0;
9157
9158
9159   DEFVAR_BOOL ("inhibit-iso-escape-detection",
9160                &inhibit_iso_escape_detection,
9161                doc: /*
9162 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9163
9164 By default, on reading a file, Emacs tries to detect how the text is
9165 encoded.  This code detection is sensitive to escape sequences.  If
9166 the sequence is valid as ISO2022, the code is determined as one of
9167 the ISO2022 encodings, and the file is decoded by the corresponding
9168 coding system (e.g. `iso-2022-7bit').
9169
9170 However, there may be a case that you want to read escape sequences in
9171 a file as is.  In such a case, you can set this variable to non-nil.
9172 Then, as the code detection ignores any escape sequences, no file is
9173 detected as encoded in some ISO2022 encoding.  The result is that all
9174 escape sequences become visible in a buffer.
9175
9176 The default value is nil, and it is strongly recommended not to change
9177 it.  That is because many Emacs Lisp source files that contain
9178 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9179 in Emacs's distribution, and they won't be decoded correctly on
9180 reading if you suppress escape sequence detection.
9181
9182 The other way to read escape sequences in a file without decoding is
9183 to explicitly specify some coding system that doesn't use ISO2022's
9184 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
9185   inhibit_iso_escape_detection = 0;
9186
9187   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
9188                doc: /* Char table for translating self-inserting characters.
9189 This is applied to the result of input methods, not their input.  See also
9190 `keyboard-translate-table'.  */);
9191     Vtranslation_table_for_input = Qnil;
9192
9193   {
9194     Lisp_Object args[coding_arg_max];
9195     Lisp_Object plist[16];
9196     int i;
9197
9198     for (i = 0; i < coding_arg_max; i++)
9199       args[i] = Qnil;
9200
9201     plist[0] = intern (":name");
9202     plist[1] = args[coding_arg_name] = Qno_conversion;
9203     plist[2] = intern (":mnemonic");
9204     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
9205     plist[4] = intern (":coding-type");
9206     plist[5] = args[coding_arg_coding_type] = Qraw_text;
9207     plist[6] = intern (":ascii-compatible-p");
9208     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
9209     plist[8] = intern (":default-char");
9210     plist[9] = args[coding_arg_default_char] = make_number (0);
9211     plist[10] = intern (":for-unibyte");
9212     plist[11] = args[coding_arg_for_unibyte] = Qt;
9213     plist[12] = intern (":docstring");
9214     plist[13] = build_string ("Do no conversion.\n\
9215 \n\
9216 When you visit a file with this coding, the file is read into a\n\
9217 unibyte buffer as is, thus each byte of a file is treated as a\n\
9218 character.");
9219     plist[14] = intern (":eol-type");
9220     plist[15] = args[coding_arg_eol_type] = Qunix;
9221     args[coding_arg_plist] = Flist (16, plist);
9222     Fdefine_coding_system_internal (coding_arg_max, args);
9223   }
9224
9225   setup_coding_system (Qno_conversion, &keyboard_coding);
9226   setup_coding_system (Qno_conversion, &terminal_coding);
9227   setup_coding_system (Qno_conversion, &safe_terminal_coding);
9228
9229   {
9230     int i;
9231
9232     for (i = 0; i < coding_category_max; i++)
9233       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
9234   }
9235 }
9236
9237 char *
9238 emacs_strerror (error_number)
9239      int error_number;
9240 {
9241   char *str;
9242
9243   synchronize_system_messages_locale ();
9244   str = strerror (error_number);
9245
9246   if (! NILP (Vlocale_coding_system))
9247     {
9248       Lisp_Object dec = code_convert_string_norecord (build_string (str),
9249                                                       Vlocale_coding_system,
9250                                                       0);
9251       str = (char *) SDATA (dec);
9252     }
9253
9254   return str;
9255 }
9256
9257 #endif /* emacs */