src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3      Licensed to the Free Software Foundation.
   4    Copyright (C) 2001, 2002 Free Software Foundation, Inc.
   5    Copyright (C) 2003
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H13PRO009
   8
   9 This file is part of GNU Emacs.
  10
  11 GNU Emacs is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU General Public License as published by
  13 the Free Software Foundation; either version 2, or (at your option)
  14 any later version.
  15
  16 GNU Emacs is distributed in the hope that it will be useful,
  17 but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 GNU General Public License for more details.
  20
  21 You should have received a copy of the GNU General Public License
  22 along with GNU Emacs; see the file COPYING.  If not, write to
  23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  24 Boston, MA 02111-1307, USA.  */
  25
  26 /*** TABLE OF CONTENTS ***
  27
  28   0. General comments
  29   1. Preamble
  30   2. Emacs' internal format (emacs-utf-8) handlers
  31   3. UTF-8 handlers
  32   4. UTF-16 handlers
  33   5. Charset-base coding systems handlers
  34   6. emacs-mule (old Emacs' internal format) handlers
  35   7. ISO2022 handlers
  36   8. Shift-JIS and BIG5 handlers
  37   9. CCL handlers
  38   10. C library functions
  39   11. Emacs Lisp library functions
  40   12. Postamble
  41
  42 */
  43
  44 /*** 0. General comments ***
  45
  46
  47 CODING SYSTEM
  48
  49   A coding system is an object for an encoding mechanism that contains
  50   information about how to convert byte sequences to character
  51   sequences and vice versa.  When we say "decode", it means converting
  52   a byte sequence of a specific coding system into a character
  53   sequence that is represented by Emacs' internal coding system
  54   `emacs-utf-8', and when we say "encode", it means converting a
  55   character sequence of emacs-utf-8 to a byte sequence of a specific
  56   coding system.
  57
  58   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  59   C level, a coding system is represented by a vector of attributes
  60   stored in the hash table Vcharset_hash_table.  The conversion from
  61   coding system symbol to attributes vector is done by looking up
  62   Vcharset_hash_table by the symbol.
  63
  64   Coding systems are classified into the following types depending on
  65   the encoding mechanism.  Here's a brief description of the types.
  66
  67   o UTF-8
  68
  69   o UTF-16
  70
  71   o Charset-base coding system
  72
  73   A coding system defined by one or more (coded) character sets.
  74   Decoding and encoding are done by a code converter defined for each
  75   character set.
  76
  77   o Old Emacs internal format (emacs-mule)
  78
  79   The coding system adopted by old versions of Emacs (20 and 21).
  80
  81   o ISO2022-base coding system
  82
  83   The most famous coding system for multiple character sets.  X's
  84   Compound Text, various EUCs (Extended Unix Code), and coding systems
  85   used in the Internet communication such as ISO-2022-JP are all
  86   variants of ISO2022.
  87
  88   o SJIS (or Shift-JIS or MS-Kanji-Code)
  89
  90   A coding system to encode character sets: ASCII, JISX0201, and
  91   JISX0208.  Widely used for PC's in Japan.  Details are described in
  92   section 8.
  93
  94   o BIG5
  95
  96   A coding system to encode character sets: ASCII and Big5.  Widely
  97   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  98   described in section 8.  In this file, when we write "big5" (all
  99   lowercase), we mean the coding system, and when we write "Big5"
 100   (capitalized), we mean the character set.
 101
 102   o CCL
 103
 104   If a user wants to decode/encode text encoded in a coding system
 105   not listed above, he can supply a decoder and an encoder for it in
 106   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 107   program while decoding/encoding.
 108
 109   o Raw-text
 110
 111   A coding system for text containing raw eight-bit data.  Emacs
 112   treats each byte of source text as a character (except for
 113   end-of-line conversion).
 114
 115   o No-conversion
 116
 117   Like raw text, but don't do end-of-line conversion.
 118
 119
 120 END-OF-LINE FORMAT
 121
 122   How text end-of-line is encoded depends on operating system.  For
 123   instance, Unix's format is just one byte of LF (line-feed) code,
 124   whereas DOS's format is two-byte sequence of `carriage-return' and
 125   `line-feed' codes.  MacOS's format is usually one byte of
 126   `carriage-return'.
 127
 128   Since text character encoding and end-of-line encoding are
 129   independent, any coding system described above can take any format
 130   of end-of-line (except for no-conversion).
 131
 132 STRUCT CODING_SYSTEM
 133
 134   Before using a coding system for code conversion (i.e. decoding and
 135   encoding), we setup a structure of type `struct coding_system'.
 136   This structure keeps various information about a specific code
 137   conversion (e.g. the location of source and destination data).
 138
 139 */
 140
 141 /* COMMON MACROS */
 142
 143
 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 145
 146   These functions check if a byte sequence specified as a source in
 147   CODING conforms to the format of XXX, and update the members of
 148   DETECT_INFO.
 149
 150   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 151
 152   Below is the template of these functions.  */
 153
 154 #if 0
 155 static int
 156 detect_coding_XXX (coding, detect_info)
 157      struct coding_system *coding;
 158      struct coding_detection_info *detect_info;
 159 {
 160   unsigned char *src = coding->source;
 161   unsigned char *src_end = coding->source + coding->src_bytes;
 162   int multibytep = coding->src_multibyte;
 163   int consumed_chars = 0;
 164   int found = 0;
 165   ...;
 166
 167   while (1)
 168     {
 169       /* Get one byte from the source.  If the souce is exausted, jump
 170          to no_more_source:.  */
 171       ONE_MORE_BYTE (c);
 172
 173       if (! __C_conforms_to_XXX___ (c))
 174         break;
 175       if (! __C_strongly_suggests_XXX__ (c))
 176         found = CATEGORY_MASK_XXX;
 177     }
 178   /* The byte sequence is invalid for XXX.  */
 179   detect_info->rejected |= CATEGORY_MASK_XXX;
 180   return 0;
 181
 182  no_more_source:
 183   /* The source exausted successfully.  */
 184   detect_info->found |= found;
 185   return 1;
 186 }
 187 #endif
 188
 189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 190
 191   These functions decode a byte sequence specified as a source by
 192   CODING.  The resulting multibyte text goes to a place pointed to by
 193   CODING->charbuf, the length of which should not exceed
 194   CODING->charbuf_size;
 195
 196   These functions set the information of original and decoded texts in
 197   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 198   They also set CODING->result to one of CODING_RESULT_XXX indicating
 199   how the decoding is finished.
 200
 201   Below is the template of these functions.  */
 202
 203 #if 0
 204 static void
 205 decode_coding_XXXX (coding)
 206      struct coding_system *coding;
 207 {
 208   unsigned char *src = coding->source + coding->consumed;
 209   unsigned char *src_end = coding->source + coding->src_bytes;
 210   /* SRC_BASE remembers the start position in source in each loop.
 211      The loop will be exited when there's not enough source code, or
 212      when there's no room in CHARBUF for a decoded character.  */
 213   unsigned char *src_base;
 214   /* A buffer to produce decoded characters.  */
 215   int *charbuf = coding->charbuf + coding->charbuf_used;
 216   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 217   int multibytep = coding->src_multibyte;
 218
 219   while (1)
 220     {
 221       src_base = src;
 222       if (charbuf < charbuf_end)
 223         /* No more room to produce a decoded character.  */
 224         break;
 225       ONE_MORE_BYTE (c);
 226       /* Decode it. */
 227     }
 228
 229  no_more_source:
 230   if (src_base < src_end
 231       && coding->mode & CODING_MODE_LAST_BLOCK)
 232     /* If the source ends by partial bytes to construct a character,
 233        treat them as eight-bit raw data.  */
 234     while (src_base < src_end && charbuf < charbuf_end)
 235       *charbuf++ = *src_base++;
 236   /* Remember how many bytes and characters we consumed.  If the
 237      source is multibyte, the bytes and chars are not identical.  */
 238   coding->consumed = coding->consumed_char = src_base - coding->source;
 239   /* Remember how many characters we produced.  */
 240   coding->charbuf_used = charbuf - coding->charbuf;
 241 }
 242 #endif
 243
 244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 245
 246   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 247   internal multibyte format by CODING.  The resulting byte sequence
 248   goes to a place pointed to by DESTINATION, the length of which
 249   should not exceed DST_BYTES.
 250
 251   These functions set the information of original and encoded texts in
 252   the members produced, produced_char, consumed, and consumed_char of
 253   the structure *CODING.  They also set the member result to one of
 254   CODING_RESULT_XXX indicating how the encoding finished.
 255
 256   DST_BYTES zero means that source area and destination area are
 257   overlapped, which means that we can produce a encoded text until it
 258   reaches at the head of not-yet-encoded source text.
 259
 260   Below is a template of these functions.  */
 261 #if 0
 262 static void
 263 encode_coding_XXX (coding)
 264      struct coding_system *coding;
 265 {
 266   int multibytep = coding->dst_multibyte;
 267   int *charbuf = coding->charbuf;
 268   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 269   unsigned char *dst = coding->destination + coding->produced;
 270   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 271   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 272   int produced_chars = 0;
 273
 274   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 275     {
 276       int c = *charbuf;
 277       /* Encode C into DST, and increment DST.  */
 278     }
 279  label_no_more_destination:
 280   /* How many chars and bytes we produced.  */
 281   coding->produced_char += produced_chars;
 282   coding->produced = dst - coding->destination;
 283 }
 284 #endif
 285
 286 \f
 287 /*** 1. Preamble ***/
 288
 289 #include <config.h>
 290 #include <stdio.h>
 291
 292 #include "lisp.h"
 293 #include "buffer.h"
 294 #include "character.h"
 295 #include "charset.h"
 296 #include "ccl.h"
 297 #include "composite.h"
 298 #include "coding.h"
 299 #include "window.h"
 300
 301 Lisp_Object Vcoding_system_hash_table;
 302
 303 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 304 Lisp_Object Qunix, Qdos;
 305 extern Lisp_Object Qmac;        /* frame.c */
 306 Lisp_Object Qbuffer_file_coding_system;
 307 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 308 Lisp_Object Qdefault_char;
 309 Lisp_Object Qno_conversion, Qundecided;
 310 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 311 Lisp_Object Qbig, Qlittle;
 312 Lisp_Object Qcoding_system_history;
 313 Lisp_Object Qvalid_codes;
 314 Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
 315 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 316 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 317
 318 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 319 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 320 Lisp_Object Qstart_process, Qopen_network_stream;
 321 Lisp_Object Qtarget_idx;
 322
 323 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 324 Lisp_Object Qinterrupted, Qinsufficient_memory;
 325
 326 int coding_system_require_warning;
 327
 328 Lisp_Object Vselect_safe_coding_system_function;
 329
 330 /* Mnemonic string for each format of end-of-line.  */
 331 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 332 /* Mnemonic string to indicate format of end-of-line is not yet
 333    decided.  */
 334 Lisp_Object eol_mnemonic_undecided;
 335
 336 #ifdef emacs
 337
 338 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 339
 340 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 341
 342 /* Coding system emacs-mule and raw-text are for converting only
 343    end-of-line format.  */
 344 Lisp_Object Qemacs_mule, Qraw_text;
 345 Lisp_Object Qutf_8_emacs;
 346
 347 /* Coding-systems are handed between Emacs Lisp programs and C internal
 348    routines by the following three variables.  */
 349 /* Coding-system for reading files and receiving data from process.  */
 350 Lisp_Object Vcoding_system_for_read;
 351 /* Coding-system for writing files and sending data to process.  */
 352 Lisp_Object Vcoding_system_for_write;
 353 /* Coding-system actually used in the latest I/O.  */
 354 Lisp_Object Vlast_coding_system_used;
 355 /* Set to non-nil when an error is detected while code conversion.  */
 356 Lisp_Object Vlast_code_conversion_error;
 357 /* A vector of length 256 which contains information about special
 358    Latin codes (especially for dealing with Microsoft codes).  */
 359 Lisp_Object Vlatin_extra_code_table;
 360
 361 /* Flag to inhibit code conversion of end-of-line format.  */
 362 int inhibit_eol_conversion;
 363
 364 /* Flag to inhibit ISO2022 escape sequence detection.  */
 365 int inhibit_iso_escape_detection;
 366
 367 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 368 int inherit_process_coding_system;
 369
 370 /* Coding system to be used to encode text for terminal display.  */
 371 struct coding_system terminal_coding;
 372
 373 /* Coding system to be used to encode text for terminal display when
 374    terminal coding system is nil.  */
 375 struct coding_system safe_terminal_coding;
 376
 377 /* Coding system of what is sent from terminal keyboard.  */
 378 struct coding_system keyboard_coding;
 379
 380 Lisp_Object Vfile_coding_system_alist;
 381 Lisp_Object Vprocess_coding_system_alist;
 382 Lisp_Object Vnetwork_coding_system_alist;
 383
 384 Lisp_Object Vlocale_coding_system;
 385
 386 #endif /* emacs */
 387
 388 /* Flag to tell if we look up translation table on character code
 389    conversion.  */
 390 Lisp_Object Venable_character_translation;
 391 /* Standard translation table to look up on decoding (reading).  */
 392 Lisp_Object Vstandard_translation_table_for_decode;
 393 /* Standard translation table to look up on encoding (writing).  */
 394 Lisp_Object Vstandard_translation_table_for_encode;
 395
 396 Lisp_Object Qtranslation_table;
 397 Lisp_Object Qtranslation_table_id;
 398 Lisp_Object Qtranslation_table_for_decode;
 399 Lisp_Object Qtranslation_table_for_encode;
 400
 401 /* Alist of charsets vs revision number.  */
 402 static Lisp_Object Vcharset_revision_table;
 403
 404 /* Default coding systems used for process I/O.  */
 405 Lisp_Object Vdefault_process_coding_system;
 406
 407 /* Char table for translating Quail and self-inserting input.  */
 408 Lisp_Object Vtranslation_table_for_input;
 409
 410 /* Two special coding systems.  */
 411 Lisp_Object Vsjis_coding_system;
 412 Lisp_Object Vbig5_coding_system;
 413
 414 /* ISO2022 section */
 415
 416 #define CODING_ISO_INITIAL(coding, reg)                 \
 417   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 418                      coding_attr_iso_initial),          \
 419                reg)))
 420
 421
 422 #define CODING_ISO_REQUEST(coding, charset_id)  \
 423   ((charset_id <= (coding)->max_charset_id      \
 424     ? (coding)->safe_charsets[charset_id]       \
 425     : -1))
 426
 427
 428 #define CODING_ISO_FLAGS(coding)        \
 429   ((coding)->spec.iso_2022.flags)
 430 #define CODING_ISO_DESIGNATION(coding, reg)     \
 431   ((coding)->spec.iso_2022.current_designation[reg])
 432 #define CODING_ISO_INVOCATION(coding, plane)    \
 433   ((coding)->spec.iso_2022.current_invocation[plane])
 434 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 435   ((coding)->spec.iso_2022.single_shifting)
 436 #define CODING_ISO_BOL(coding)  \
 437   ((coding)->spec.iso_2022.bol)
 438 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 439   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 440
 441 /* Control characters of ISO2022.  */
 442                         /* code */      /* function */
 443 #define ISO_CODE_LF     0x0A            /* line-feed */
 444 #define ISO_CODE_CR     0x0D            /* carriage-return */
 445 #define ISO_CODE_SO     0x0E            /* shift-out */
 446 #define ISO_CODE_SI     0x0F            /* shift-in */
 447 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 448 #define ISO_CODE_ESC    0x1B            /* escape */
 449 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 450 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 451 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 452
 453 /* All code (1-byte) of ISO2022 is classified into one of the
 454    followings.  */
 455 enum iso_code_class_type
 456   {
 457     ISO_control_0,              /* Control codes in the range
 458                                    0x00..0x1F and 0x7F, except for the
 459                                    following 5 codes.  */
 460     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 461     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 462     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 463     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 464     ISO_control_1,              /* Control codes in the range
 465                                    0x80..0x9F, except for the
 466                                    following 3 codes.  */
 467     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 468     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 469     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 470     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 471     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 472     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 473     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 474   };
 475
 476 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 477     `iso-flags' attribute of an iso2022 coding system.  */
 478
 479 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 480    instead of the correct short-form sequence (e.g. ESC $ A).  */
 481 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 482
 483 /* If set, reset graphic planes and registers at end-of-line to the
 484    initial state.  */
 485 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 486
 487 /* If set, reset graphic planes and registers before any control
 488    characters to the initial state.  */
 489 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 490
 491 /* If set, encode by 7-bit environment.  */
 492 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 493
 494 /* If set, use locking-shift function.  */
 495 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 496
 497 /* If set, use single-shift function.  Overwrite
 498    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 499 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 500
 501 /* If set, use designation escape sequence.  */
 502 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 503
 504 /* If set, produce revision number sequence.  */
 505 #define CODING_ISO_FLAG_REVISION        0x0080
 506
 507 /* If set, produce ISO6429's direction specifying sequence.  */
 508 #define CODING_ISO_FLAG_DIRECTION       0x0100
 509
 510 /* If set, assume designation states are reset at beginning of line on
 511    output.  */
 512 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 513
 514 /* If set, designation sequence should be placed at beginning of line
 515    on output.  */
 516 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 517
 518 /* If set, do not encode unsafe charactes on output.  */
 519 #define CODING_ISO_FLAG_SAFE            0x0800
 520
 521 /* If set, extra latin codes (128..159) are accepted as a valid code
 522    on input.  */
 523 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 524
 525 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 526
 527 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 528
 529 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 530
 531 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 532
 533 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 534
 535 /* A character to be produced on output if encoding of the original
 536    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 537 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 538
 539
 540 /* UTF-16 section */
 541 #define CODING_UTF_16_BOM(coding)       \
 542   ((coding)->spec.utf_16.bom)
 543
 544 #define CODING_UTF_16_ENDIAN(coding)    \
 545   ((coding)->spec.utf_16.endian)
 546
 547 #define CODING_UTF_16_SURROGATE(coding) \
 548   ((coding)->spec.utf_16.surrogate)
 549
 550
 551 /* CCL section */
 552 #define CODING_CCL_DECODER(coding)      \
 553   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 554 #define CODING_CCL_ENCODER(coding)      \
 555   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 556 #define CODING_CCL_VALIDS(coding)                                          \
 557   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 558
 559 /* Index for each coding category in `coding_categories' */
 560
 561 enum coding_category
 562   {
 563     coding_category_iso_7,
 564     coding_category_iso_7_tight,
 565     coding_category_iso_8_1,
 566     coding_category_iso_8_2,
 567     coding_category_iso_7_else,
 568     coding_category_iso_8_else,
 569     coding_category_utf_8,
 570     coding_category_utf_16_auto,
 571     coding_category_utf_16_be,
 572     coding_category_utf_16_le,
 573     coding_category_utf_16_be_nosig,
 574     coding_category_utf_16_le_nosig,
 575     coding_category_charset,
 576     coding_category_sjis,
 577     coding_category_big5,
 578     coding_category_ccl,
 579     coding_category_emacs_mule,
 580     /* All above are targets of code detection.  */
 581     coding_category_raw_text,
 582     coding_category_undecided,
 583     coding_category_max
 584   };
 585
 586 /* Definitions of flag bits used in detect_coding_XXXX.  */
 587 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 588 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 589 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 590 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 591 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 592 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 593 #define CATEGORY_MASK_UTF_8             (1 << coding_category_utf_8)
 594 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 595 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 596 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 597 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 598 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 599 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 600 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 601 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 602 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 603 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 604 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 605
 606 /* This value is returned if detect_coding_mask () find nothing other
 607    than ASCII characters.  */
 608 #define CATEGORY_MASK_ANY               \
 609   (CATEGORY_MASK_ISO_7                  \
 610    | CATEGORY_MASK_ISO_7_TIGHT          \
 611    | CATEGORY_MASK_ISO_8_1              \
 612    | CATEGORY_MASK_ISO_8_2              \
 613    | CATEGORY_MASK_ISO_7_ELSE           \
 614    | CATEGORY_MASK_ISO_8_ELSE           \
 615    | CATEGORY_MASK_UTF_8                \
 616    | CATEGORY_MASK_UTF_16_BE            \
 617    | CATEGORY_MASK_UTF_16_LE            \
 618    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 619    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 620    | CATEGORY_MASK_CHARSET              \
 621    | CATEGORY_MASK_SJIS                 \
 622    | CATEGORY_MASK_BIG5                 \
 623    | CATEGORY_MASK_CCL                  \
 624    | CATEGORY_MASK_EMACS_MULE)
 625
 626
 627 #define CATEGORY_MASK_ISO_7BIT \
 628   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 629
 630 #define CATEGORY_MASK_ISO_8BIT \
 631   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 632
 633 #define CATEGORY_MASK_ISO_ELSE \
 634   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 635
 636 #define CATEGORY_MASK_ISO_ESCAPE        \
 637   (CATEGORY_MASK_ISO_7                  \
 638    | CATEGORY_MASK_ISO_7_TIGHT          \
 639    | CATEGORY_MASK_ISO_7_ELSE           \
 640    | CATEGORY_MASK_ISO_8_ELSE)
 641
 642 #define CATEGORY_MASK_ISO       \
 643   (  CATEGORY_MASK_ISO_7BIT     \
 644      | CATEGORY_MASK_ISO_8BIT   \
 645      | CATEGORY_MASK_ISO_ELSE)
 646
 647 #define CATEGORY_MASK_UTF_16            \
 648   (CATEGORY_MASK_UTF_16_BE              \
 649    | CATEGORY_MASK_UTF_16_LE            \
 650    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 651    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 652
 653
 654 /* List of symbols `coding-category-xxx' ordered by priority.  This
 655    variable is exposed to Emacs Lisp.  */
 656 static Lisp_Object Vcoding_category_list;
 657
 658 /* Table of coding categories (Lisp symbols).  This variable is for
 659    internal use oly.  */
 660 static Lisp_Object Vcoding_category_table;
 661
 662 /* Table of coding-categories ordered by priority.  */
 663 static enum coding_category coding_priorities[coding_category_max];
 664
 665 /* Nth element is a coding context for the coding system bound to the
 666    Nth coding category.  */
 667 static struct coding_system coding_categories[coding_category_max];
 668
 669 /*** Commonly used macros and functions ***/
 670
 671 #ifndef min
 672 #define min(a, b) ((a) < (b) ? (a) : (b))
 673 #endif
 674 #ifndef max
 675 #define max(a, b) ((a) > (b) ? (a) : (b))
 676 #endif
 677
 678 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 679   do {                                                  \
 680     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 681     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 682   } while (0)
 683
 684
 685 /* Safely get one byte from the source text pointed by SRC which ends
 686    at SRC_END, and set C to that byte.  If there are not enough bytes
 687    in the source, it jumps to `no_more_source'.  If multibytep is
 688    nonzero, and a multibyte character is found at SRC, set C to the
 689    negative value of the character code.  The caller should declare
 690    and set these variables appropriately in advance:
 691         src, src_end, multibytep */
 692
 693 #define ONE_MORE_BYTE(c)                                \
 694   do {                                                  \
 695     if (src == src_end)                                 \
 696       {                                                 \
 697         if (src_base < src)                             \
 698           record_conversion_result                      \
 699             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 700         goto no_more_source;                            \
 701       }                                                 \
 702     c = *src++;                                         \
 703     if (multibytep && (c & 0x80))                       \
 704       {                                                 \
 705         if ((c & 0xFE) == 0xC0)                         \
 706           c = ((c & 1) << 6) | *src++;                  \
 707         else                                            \
 708           {                                             \
 709             c = - string_char (--src, &src, NULL);      \
 710             record_conversion_result                    \
 711               (coding, CODING_RESULT_INVALID_SRC);      \
 712           }                                             \
 713       }                                                 \
 714     consumed_chars++;                                   \
 715   } while (0)
 716
 717
 718 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 719   do {                                                  \
 720     c = *src++;                                         \
 721     if (multibytep && (c & 0x80))                       \
 722       {                                                 \
 723         if ((c & 0xFE) == 0xC0)                         \
 724           c = ((c & 1) << 6) | *src++;                  \
 725         else                                            \
 726           {                                             \
 727             c = - string_char (--src, &src, NULL);      \
 728             record_conversion_result                    \
 729               (coding, CODING_RESULT_INVALID_SRC);      \
 730           }                                             \
 731       }                                                 \
 732     consumed_chars++;                                   \
 733   } while (0)
 734
 735
 736 /* Store a byte C in the place pointed by DST and increment DST to the
 737    next free point, and increment PRODUCED_CHARS.  The caller should
 738    assure that C is 0..127, and declare and set the variable `dst'
 739    appropriately in advance.
 740 */
 741
 742
 743 #define EMIT_ONE_ASCII_BYTE(c)  \
 744   do {                          \
 745     produced_chars++;           \
 746     *dst++ = (c);               \
 747   } while (0)
 748
 749
 750 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 751
 752 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 753   do {                                  \
 754     produced_chars += 2;                \
 755     *dst++ = (c1), *dst++ = (c2);       \
 756   } while (0)
 757
 758
 759 /* Store a byte C in the place pointed by DST and increment DST to the
 760    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 761    nonzero, store in an appropriate multibyte from.  The caller should
 762    declare and set the variables `dst' and `multibytep' appropriately
 763    in advance.  */
 764
 765 #define EMIT_ONE_BYTE(c)                \
 766   do {                                  \
 767     produced_chars++;                   \
 768     if (multibytep)                     \
 769       {                                 \
 770         int ch = (c);                   \
 771         if (ch >= 0x80)                 \
 772           ch = BYTE8_TO_CHAR (ch);      \
 773         CHAR_STRING_ADVANCE (ch, dst);  \
 774       }                                 \
 775     else                                \
 776       *dst++ = (c);                     \
 777   } while (0)
 778
 779
 780 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 781
 782 #define EMIT_TWO_BYTES(c1, c2)          \
 783   do {                                  \
 784     produced_chars += 2;                \
 785     if (multibytep)                     \
 786       {                                 \
 787         int ch;                         \
 788                                         \
 789         ch = (c1);                      \
 790         if (ch >= 0x80)                 \
 791           ch = BYTE8_TO_CHAR (ch);      \
 792         CHAR_STRING_ADVANCE (ch, dst);  \
 793         ch = (c2);                      \
 794         if (ch >= 0x80)                 \
 795           ch = BYTE8_TO_CHAR (ch);      \
 796         CHAR_STRING_ADVANCE (ch, dst);  \
 797       }                                 \
 798     else                                \
 799       {                                 \
 800         *dst++ = (c1);                  \
 801         *dst++ = (c2);                  \
 802       }                                 \
 803   } while (0)
 804
 805
 806 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 807   do {                                  \
 808     EMIT_ONE_BYTE (c1);                 \
 809     EMIT_TWO_BYTES (c2, c3);            \
 810   } while (0)
 811
 812
 813 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 814   do {                                          \
 815     EMIT_TWO_BYTES (c1, c2);                    \
 816     EMIT_TWO_BYTES (c3, c4);                    \
 817   } while (0)
 818
 819
 820 /* Prototypes for static functions.  */
 821 static void record_conversion_result P_ ((struct coding_system *coding,
 822                                           enum coding_result_code result));
 823 static int detect_coding_utf_8 P_ ((struct coding_system *,
 824                                     struct coding_detection_info *info));
 825 static void decode_coding_utf_8 P_ ((struct coding_system *));
 826 static int encode_coding_utf_8 P_ ((struct coding_system *));
 827
 828 static int detect_coding_utf_16 P_ ((struct coding_system *,
 829                                      struct coding_detection_info *info));
 830 static void decode_coding_utf_16 P_ ((struct coding_system *));
 831 static int encode_coding_utf_16 P_ ((struct coding_system *));
 832
 833 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 834                                        struct coding_detection_info *info));
 835 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 836 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 837
 838 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 839                                          struct coding_detection_info *info));
 840 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 841 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 842
 843 static int detect_coding_sjis P_ ((struct coding_system *,
 844                                    struct coding_detection_info *info));
 845 static void decode_coding_sjis P_ ((struct coding_system *));
 846 static int encode_coding_sjis P_ ((struct coding_system *));
 847
 848 static int detect_coding_big5 P_ ((struct coding_system *,
 849                                    struct coding_detection_info *info));
 850 static void decode_coding_big5 P_ ((struct coding_system *));
 851 static int encode_coding_big5 P_ ((struct coding_system *));
 852
 853 static int detect_coding_ccl P_ ((struct coding_system *,
 854                                   struct coding_detection_info *info));
 855 static void decode_coding_ccl P_ ((struct coding_system *));
 856 static int encode_coding_ccl P_ ((struct coding_system *));
 857
 858 static void decode_coding_raw_text P_ ((struct coding_system *));
 859 static int encode_coding_raw_text P_ ((struct coding_system *));
 860
 861 static void coding_set_source P_ ((struct coding_system *));
 862 static void coding_set_destination P_ ((struct coding_system *));
 863 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 864 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 865                                             EMACS_INT));
 866 static unsigned char *alloc_destination P_ ((struct coding_system *,
 867                                              EMACS_INT, unsigned char *));
 868 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 869 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 870                                                      int *, int *,
 871                                                      unsigned char *));
 872 static int detect_eol P_ ((const unsigned char *,
 873                            EMACS_INT, enum coding_category));
 874 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 875 static void decode_eol P_ ((struct coding_system *));
 876 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 877 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
 878                                         int, int *, int *));
 879 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 880 static INLINE void produce_composition P_ ((struct coding_system *, int *,
 881                                             EMACS_INT));
 882 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 883                                         EMACS_INT));
 884 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 885 static int decode_coding P_ ((struct coding_system *));
 886 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 887                                                       struct coding_system *,
 888                                                       int *, EMACS_INT *));
 889 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 890                                                   struct coding_system *,
 891                                                   int *, EMACS_INT *));
 892 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 893 static int encode_coding P_ ((struct coding_system *));
 894 static Lisp_Object make_conversion_work_buffer P_ ((int));
 895 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 896 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 897 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 898
 899 static void
 900 record_conversion_result (struct coding_system *coding,
 901                           enum coding_result_code result)
 902 {
 903   coding->result = result;
 904   switch (result)
 905     {
 906     case CODING_RESULT_INSUFFICIENT_SRC:
 907       Vlast_code_conversion_error = Qinsufficient_source;
 908       break;
 909     case CODING_RESULT_INCONSISTENT_EOL:
 910       Vlast_code_conversion_error = Qinconsistent_eol;
 911       break;
 912     case CODING_RESULT_INVALID_SRC:
 913       Vlast_code_conversion_error = Qinvalid_source;
 914       break;
 915     case CODING_RESULT_INTERRUPT:
 916       Vlast_code_conversion_error = Qinterrupted;
 917       break;
 918     case CODING_RESULT_INSUFFICIENT_MEM:
 919       Vlast_code_conversion_error = Qinsufficient_memory;
 920       break;
 921     }
 922 }
 923
 924 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 925   do {                                                                       \
 926     charset_map_loaded = 0;                                                  \
 927     c = DECODE_CHAR (charset, code);                                         \
 928     if (charset_map_loaded)                                                  \
 929       {                                                                      \
 930         const unsigned char *orig = coding->source;                          \
 931         EMACS_INT offset;                                                    \
 932                                                                              \
 933         coding_set_source (coding);                                          \
 934         offset = coding->source - orig;                                      \
 935         src += offset;                                                       \
 936         src_base += offset;                                                  \
 937         src_end += offset;                                                   \
 938       }                                                                      \
 939   } while (0)
 940
 941
 942 #define ASSURE_DESTINATION(bytes)                               \
 943   do {                                                          \
 944     if (dst + (bytes) >= dst_end)                               \
 945       {                                                         \
 946         int more_bytes = charbuf_end - charbuf + (bytes);       \
 947                                                                 \
 948         dst = alloc_destination (coding, more_bytes, dst);      \
 949         dst_end = coding->destination + coding->dst_bytes;      \
 950       }                                                         \
 951   } while (0)
 952
 953
 954
 955 static void
 956 coding_set_source (coding)
 957      struct coding_system *coding;
 958 {
 959   if (BUFFERP (coding->src_object))
 960     {
 961       struct buffer *buf = XBUFFER (coding->src_object);
 962
 963       if (coding->src_pos < 0)
 964         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
 965       else
 966         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
 967     }
 968   else if (STRINGP (coding->src_object))
 969     {
 970       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
 971     }
 972   else
 973     /* Otherwise, the source is C string and is never relocated
 974        automatically.  Thus we don't have to update anything.  */
 975     ;
 976 }
 977
 978 static void
 979 coding_set_destination (coding)
 980      struct coding_system *coding;
 981 {
 982   if (BUFFERP (coding->dst_object))
 983     {
 984       if (coding->src_pos < 0)
 985         {
 986           coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
 987           coding->dst_bytes = (GAP_END_ADDR
 988                                - (coding->src_bytes - coding->consumed)
 989                                - coding->destination);
 990         }
 991       else
 992         {
 993           /* We are sure that coding->dst_pos_byte is before the gap
 994              of the buffer. */
 995           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
 996                                  + coding->dst_pos_byte - 1);
 997           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
 998                                - coding->destination);
 999         }
1000     }
1001   else
1002     /* Otherwise, the destination is C string and is never relocated
1003        automatically.  Thus we don't have to update anything.  */
1004     ;
1005 }
1006
1007
1008 static void
1009 coding_alloc_by_realloc (coding, bytes)
1010      struct coding_system *coding;
1011      EMACS_INT bytes;
1012 {
1013   coding->destination = (unsigned char *) xrealloc (coding->destination,
1014                                                     coding->dst_bytes + bytes);
1015   coding->dst_bytes += bytes;
1016 }
1017
1018 static void
1019 coding_alloc_by_making_gap (coding, bytes)
1020      struct coding_system *coding;
1021      EMACS_INT bytes;
1022 {
1023   if (BUFFERP (coding->dst_object)
1024       && EQ (coding->src_object, coding->dst_object))
1025     {
1026       EMACS_INT add = coding->src_bytes - coding->consumed;
1027
1028       GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1029       make_gap (bytes);
1030       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1031     }
1032   else
1033     {
1034       Lisp_Object this_buffer;
1035
1036       this_buffer = Fcurrent_buffer ();
1037       set_buffer_internal (XBUFFER (coding->dst_object));
1038       make_gap (bytes);
1039       set_buffer_internal (XBUFFER (this_buffer));
1040     }
1041 }
1042
1043
1044 static unsigned char *
1045 alloc_destination (coding, nbytes, dst)
1046      struct coding_system *coding;
1047      EMACS_INT nbytes;
1048      unsigned char *dst;
1049 {
1050   EMACS_INT offset = dst - coding->destination;
1051
1052   if (BUFFERP (coding->dst_object))
1053     coding_alloc_by_making_gap (coding, nbytes);
1054   else
1055     coding_alloc_by_realloc (coding, nbytes);
1056   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1057   coding_set_destination (coding);
1058   dst = coding->destination + offset;
1059   return dst;
1060 }
1061
1062 /** Macros for annotations.  */
1063
1064 /* Maximum length of annotation data (sum of annotations for
1065    composition and charset).  */
1066 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1067
1068 /* An annotation data is stored in the array coding->charbuf in this
1069    format:
1070      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1071    LENGTH is the number of elements in the annotation.
1072    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1073    NCHARS is the number of characters in the text annotated.
1074
1075    The format of the following elements depend on ANNOTATION_MASK.
1076
1077    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1078    follows:
1079      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1080    METHOD is one of enum composition_method.
1081    Optionnal COMPOSITION-COMPONENTS are characters and composition
1082    rules.
1083
1084    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1085    follows.  */
1086
1087 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1088   do {                                                  \
1089     *(buf)++ = -(len);                                  \
1090     *(buf)++ = (mask);                                  \
1091     *(buf)++ = (nchars);                                \
1092     coding->annotated = 1;                              \
1093   } while (0);
1094
1095 #define ADD_COMPOSITION_DATA(buf, nchars, method)                           \
1096   do {                                                                      \
1097     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1098     *buf++ = method;                                                        \
1099   } while (0)
1100
1101
1102 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1103   do {                                                                  \
1104     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1105     *buf++ = id;                                                        \
1106   } while (0)
1107
1108 \f
1109 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1110
1111
1112
1113 \f
1114 /*** 3. UTF-8 ***/
1115
1116 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1117    Check if a text is encoded in UTF-8.  If it is, return 1, else
1118    return 0.  */
1119
1120 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1121 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1122 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1123 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1124 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1125 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1126
1127 static int
1128 detect_coding_utf_8 (coding, detect_info)
1129      struct coding_system *coding;
1130      struct coding_detection_info *detect_info;
1131 {
1132   const unsigned char *src = coding->source, *src_base;
1133   const unsigned char *src_end = coding->source + coding->src_bytes;
1134   int multibytep = coding->src_multibyte;
1135   int consumed_chars = 0;
1136   int found = 0;
1137
1138   detect_info->checked |= CATEGORY_MASK_UTF_8;
1139   /* A coding system of this category is always ASCII compatible.  */
1140   src += coding->head_ascii;
1141
1142   while (1)
1143     {
1144       int c, c1, c2, c3, c4;
1145
1146       src_base = src;
1147       ONE_MORE_BYTE (c);
1148       if (c < 0 || UTF_8_1_OCTET_P (c))
1149         continue;
1150       ONE_MORE_BYTE (c1);
1151       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1152         break;
1153       if (UTF_8_2_OCTET_LEADING_P (c))
1154         {
1155           found = CATEGORY_MASK_UTF_8;
1156           continue;
1157         }
1158       ONE_MORE_BYTE (c2);
1159       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1160         break;
1161       if (UTF_8_3_OCTET_LEADING_P (c))
1162         {
1163           found = CATEGORY_MASK_UTF_8;
1164           continue;
1165         }
1166       ONE_MORE_BYTE (c3);
1167       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1168         break;
1169       if (UTF_8_4_OCTET_LEADING_P (c))
1170         {
1171           found = CATEGORY_MASK_UTF_8;
1172           continue;
1173         }
1174       ONE_MORE_BYTE (c4);
1175       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1176         break;
1177       if (UTF_8_5_OCTET_LEADING_P (c))
1178         {
1179           found = CATEGORY_MASK_UTF_8;
1180           continue;
1181         }
1182       break;
1183     }
1184   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1185   return 0;
1186
1187  no_more_source:
1188   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1189     {
1190       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1191       return 0;
1192     }
1193   detect_info->found |= found;
1194   return 1;
1195 }
1196
1197
1198 static void
1199 decode_coding_utf_8 (coding)
1200      struct coding_system *coding;
1201 {
1202   const unsigned char *src = coding->source + coding->consumed;
1203   const unsigned char *src_end = coding->source + coding->src_bytes;
1204   const unsigned char *src_base;
1205   int *charbuf = coding->charbuf + coding->charbuf_used;
1206   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1207   int consumed_chars = 0, consumed_chars_base;
1208   int multibytep = coding->src_multibyte;
1209   Lisp_Object attr, charset_list;
1210
1211   CODING_GET_INFO (coding, attr, charset_list);
1212
1213   while (1)
1214     {
1215       int c, c1, c2, c3, c4, c5;
1216
1217       src_base = src;
1218       consumed_chars_base = consumed_chars;
1219
1220       if (charbuf >= charbuf_end)
1221         break;
1222
1223       ONE_MORE_BYTE (c1);
1224       if (c1 < 0)
1225         {
1226           c = - c1;
1227         }
1228       else if (UTF_8_1_OCTET_P(c1))
1229         {
1230           c = c1;
1231         }
1232       else
1233         {
1234           ONE_MORE_BYTE (c2);
1235           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1236             goto invalid_code;
1237           if (UTF_8_2_OCTET_LEADING_P (c1))
1238             {
1239               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1240               /* Reject overlong sequences here and below.  Encoders
1241                  producing them are incorrect, they can be misleading,
1242                  and they mess up read/write invariance.  */
1243               if (c < 128)
1244                 goto invalid_code;
1245             }
1246           else
1247             {
1248               ONE_MORE_BYTE (c3);
1249               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1250                 goto invalid_code;
1251               if (UTF_8_3_OCTET_LEADING_P (c1))
1252                 {
1253                   c = (((c1 & 0xF) << 12)
1254                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1255                   if (c < 0x800
1256                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1257                     goto invalid_code;
1258                 }
1259               else
1260                 {
1261                   ONE_MORE_BYTE (c4);
1262                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1263                     goto invalid_code;
1264                   if (UTF_8_4_OCTET_LEADING_P (c1))
1265                     {
1266                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1267                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1268                     if (c < 0x10000)
1269                       goto invalid_code;
1270                     }
1271                   else
1272                     {
1273                       ONE_MORE_BYTE (c5);
1274                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1275                         goto invalid_code;
1276                       if (UTF_8_5_OCTET_LEADING_P (c1))
1277                         {
1278                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1279                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1280                                | (c5 & 0x3F));
1281                           if ((c > MAX_CHAR) || (c < 0x200000))
1282                             goto invalid_code;
1283                         }
1284                       else
1285                         goto invalid_code;
1286                     }
1287                 }
1288             }
1289         }
1290
1291       *charbuf++ = c;
1292       continue;
1293
1294     invalid_code:
1295       src = src_base;
1296       consumed_chars = consumed_chars_base;
1297       ONE_MORE_BYTE (c);
1298       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1299       coding->errors++;
1300     }
1301
1302  no_more_source:
1303   coding->consumed_char += consumed_chars_base;
1304   coding->consumed = src_base - coding->source;
1305   coding->charbuf_used = charbuf - coding->charbuf;
1306 }
1307
1308
1309 static int
1310 encode_coding_utf_8 (coding)
1311      struct coding_system *coding;
1312 {
1313   int multibytep = coding->dst_multibyte;
1314   int *charbuf = coding->charbuf;
1315   int *charbuf_end = charbuf + coding->charbuf_used;
1316   unsigned char *dst = coding->destination + coding->produced;
1317   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1318   int produced_chars = 0;
1319   int c;
1320
1321   if (multibytep)
1322     {
1323       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1324
1325       while (charbuf < charbuf_end)
1326         {
1327           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1328
1329           ASSURE_DESTINATION (safe_room);
1330           c = *charbuf++;
1331           if (CHAR_BYTE8_P (c))
1332             {
1333               c = CHAR_TO_BYTE8 (c);
1334               EMIT_ONE_BYTE (c);
1335             }
1336           else
1337             {
1338               CHAR_STRING_ADVANCE (c, pend);
1339               for (p = str; p < pend; p++)
1340                 EMIT_ONE_BYTE (*p);
1341             }
1342         }
1343     }
1344   else
1345     {
1346       int safe_room = MAX_MULTIBYTE_LENGTH;
1347
1348       while (charbuf < charbuf_end)
1349         {
1350           ASSURE_DESTINATION (safe_room);
1351           c = *charbuf++;
1352           dst += CHAR_STRING (c, dst);
1353           produced_chars++;
1354         }
1355     }
1356   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1357   coding->produced_char += produced_chars;
1358   coding->produced = dst - coding->destination;
1359   return 0;
1360 }
1361
1362
1363 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1364    Check if a text is encoded in one of UTF-16 based coding systems.
1365    If it is, return 1, else return 0.  */
1366
1367 #define UTF_16_HIGH_SURROGATE_P(val) \
1368   (((val) & 0xFC00) == 0xD800)
1369
1370 #define UTF_16_LOW_SURROGATE_P(val) \
1371   (((val) & 0xFC00) == 0xDC00)
1372
1373 #define UTF_16_INVALID_P(val)   \
1374   (((val) == 0xFFFE)            \
1375    || ((val) == 0xFFFF)         \
1376    || UTF_16_LOW_SURROGATE_P (val))
1377
1378
1379 static int
1380 detect_coding_utf_16 (coding, detect_info)
1381      struct coding_system *coding;
1382      struct coding_detection_info *detect_info;
1383 {
1384   const unsigned char *src = coding->source, *src_base = src;
1385   const unsigned char *src_end = coding->source + coding->src_bytes;
1386   int multibytep = coding->src_multibyte;
1387   int consumed_chars = 0;
1388   int c1, c2;
1389
1390   detect_info->checked |= CATEGORY_MASK_UTF_16;
1391   if (coding->mode & CODING_MODE_LAST_BLOCK
1392       && (coding->src_chars & 1))
1393     {
1394       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1395       return 0;
1396     }
1397
1398   ONE_MORE_BYTE (c1);
1399   ONE_MORE_BYTE (c2);
1400   if ((c1 == 0xFF) && (c2 == 0xFE))
1401     {
1402       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1403                              | CATEGORY_MASK_UTF_16_AUTO);
1404       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1405                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1406                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1407     }
1408   else if ((c1 == 0xFE) && (c2 == 0xFF))
1409     {
1410       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1411                              | CATEGORY_MASK_UTF_16_AUTO);
1412       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1413                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1414                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1415     }
1416   else if (c1 >= 0 && c2 >= 0)
1417     {
1418       unsigned char b1[256], b2[256];
1419       int b1_variants = 1, b2_variants = 1;
1420       int n;
1421
1422       bzero (b1, 256), bzero (b2, 256);
1423       b1[c1]++, b2[c2]++;
1424       for (n = 0; n < 256 && src < src_end; n++)
1425         {
1426           src_base = src;
1427           ONE_MORE_BYTE (c1);
1428           ONE_MORE_BYTE (c2);
1429           if (c1 < 0 || c2 < 0)
1430             break;
1431           if (! b1[c1++]) b1_variants++;
1432           if (! b2[c2++]) b2_variants++;
1433         }
1434       if (b1_variants < b2_variants)
1435         detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1436       else
1437         detect_info->found |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1438       detect_info->rejected
1439         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1440     }
1441  no_more_source:
1442   return 1;
1443 }
1444
1445 static void
1446 decode_coding_utf_16 (coding)
1447      struct coding_system *coding;
1448 {
1449   const unsigned char *src = coding->source + coding->consumed;
1450   const unsigned char *src_end = coding->source + coding->src_bytes;
1451   const unsigned char *src_base;
1452   int *charbuf = coding->charbuf + coding->charbuf_used;
1453   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1454   int consumed_chars = 0, consumed_chars_base;
1455   int multibytep = coding->src_multibyte;
1456   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1457   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1458   int surrogate = CODING_UTF_16_SURROGATE (coding);
1459   Lisp_Object attr, charset_list;
1460
1461   CODING_GET_INFO (coding, attr, charset_list);
1462
1463   if (bom == utf_16_with_bom)
1464     {
1465       int c, c1, c2;
1466
1467       src_base = src;
1468       ONE_MORE_BYTE (c1);
1469       ONE_MORE_BYTE (c2);
1470       c = (c1 << 8) | c2;
1471
1472       if (endian == utf_16_big_endian
1473           ? c != 0xFEFF : c != 0xFFFE)
1474         {
1475           /* The first two bytes are not BOM.  Treat them as bytes
1476              for a normal character.  */
1477           src = src_base;
1478           coding->errors++;
1479         }
1480       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1481     }
1482   else if (bom == utf_16_detect_bom)
1483     {
1484       /* We have already tried to detect BOM and failed in
1485          detect_coding.  */
1486       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1487     }
1488
1489   while (1)
1490     {
1491       int c, c1, c2;
1492
1493       src_base = src;
1494       consumed_chars_base = consumed_chars;
1495
1496       if (charbuf + 2 >= charbuf_end)
1497         break;
1498
1499       ONE_MORE_BYTE (c1);
1500       if (c1 < 0)
1501         {
1502           *charbuf++ = -c1;
1503           continue;
1504         }
1505       ONE_MORE_BYTE (c2);
1506       if (c2 < 0)
1507         {
1508           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1509           *charbuf++ = -c2;
1510           continue;
1511         }
1512       c = (endian == utf_16_big_endian
1513            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1514       if (surrogate)
1515         {
1516           if (! UTF_16_LOW_SURROGATE_P (c))
1517             {
1518               if (endian == utf_16_big_endian)
1519                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1520               else
1521                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1522               *charbuf++ = c1;
1523               *charbuf++ = c2;
1524               coding->errors++;
1525               if (UTF_16_HIGH_SURROGATE_P (c))
1526                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1527               else
1528                 *charbuf++ = c;
1529             }
1530           else
1531             {
1532               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1533               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1534               *charbuf++ = 0x10000 + c;
1535             }
1536         }
1537       else
1538         {
1539           if (UTF_16_HIGH_SURROGATE_P (c))
1540             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1541           else
1542             *charbuf++ = c;
1543         }
1544     }
1545
1546  no_more_source:
1547   coding->consumed_char += consumed_chars_base;
1548   coding->consumed = src_base - coding->source;
1549   coding->charbuf_used = charbuf - coding->charbuf;
1550 }
1551
1552 static int
1553 encode_coding_utf_16 (coding)
1554      struct coding_system *coding;
1555 {
1556   int multibytep = coding->dst_multibyte;
1557   int *charbuf = coding->charbuf;
1558   int *charbuf_end = charbuf + coding->charbuf_used;
1559   unsigned char *dst = coding->destination + coding->produced;
1560   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1561   int safe_room = 8;
1562   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1563   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1564   int produced_chars = 0;
1565   Lisp_Object attrs, charset_list;
1566   int c;
1567
1568   CODING_GET_INFO (coding, attrs, charset_list);
1569
1570   if (bom != utf_16_without_bom)
1571     {
1572       ASSURE_DESTINATION (safe_room);
1573       if (big_endian)
1574         EMIT_TWO_BYTES (0xFE, 0xFF);
1575       else
1576         EMIT_TWO_BYTES (0xFF, 0xFE);
1577       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1578     }
1579
1580   while (charbuf < charbuf_end)
1581     {
1582       ASSURE_DESTINATION (safe_room);
1583       c = *charbuf++;
1584       if (c >= MAX_UNICODE_CHAR)
1585         c = coding->default_char;
1586
1587       if (c < 0x10000)
1588         {
1589           if (big_endian)
1590             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1591           else
1592             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1593         }
1594       else
1595         {
1596           int c1, c2;
1597
1598           c -= 0x10000;
1599           c1 = (c >> 10) + 0xD800;
1600           c2 = (c & 0x3FF) + 0xDC00;
1601           if (big_endian)
1602             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1603           else
1604             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1605         }
1606     }
1607   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1608   coding->produced = dst - coding->destination;
1609   coding->produced_char += produced_chars;
1610   return 0;
1611 }
1612
1613 \f
1614 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1615
1616 /* Emacs' internal format for representation of multiple character
1617    sets is a kind of multi-byte encoding, i.e. characters are
1618    represented by variable-length sequences of one-byte codes.
1619
1620    ASCII characters and control characters (e.g. `tab', `newline') are
1621    represented by one-byte sequences which are their ASCII codes, in
1622    the range 0x00 through 0x7F.
1623
1624    8-bit characters of the range 0x80..0x9F are represented by
1625    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1626    code + 0x20).
1627
1628    8-bit characters of the range 0xA0..0xFF are represented by
1629    one-byte sequences which are their 8-bit code.
1630
1631    The other characters are represented by a sequence of `base
1632    leading-code', optional `extended leading-code', and one or two
1633    `position-code's.  The length of the sequence is determined by the
1634    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1635    whereas extended leading-code and position-code take the range 0xA0
1636    through 0xFF.  See `charset.h' for more details about leading-code
1637    and position-code.
1638
1639    --- CODE RANGE of Emacs' internal format ---
1640    character set        range
1641    -------------        -----
1642    ascii                0x00..0x7F
1643    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1644    eight-bit-graphic    0xA0..0xBF
1645    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1646    ---------------------------------------------
1647
1648    As this is the internal character representation, the format is
1649    usually not used externally (i.e. in a file or in a data sent to a
1650    process).  But, it is possible to have a text externally in this
1651    format (i.e. by encoding by the coding system `emacs-mule').
1652
1653    In that case, a sequence of one-byte codes has a slightly different
1654    form.
1655
1656    At first, all characters in eight-bit-control are represented by
1657    one-byte sequences which are their 8-bit code.
1658
1659    Next, character composition data are represented by the byte
1660    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1661    where,
1662         METHOD is 0xF0 plus one of composition method (enum
1663         composition_method),
1664
1665         BYTES is 0xA0 plus a byte length of this composition data,
1666
1667         CHARS is 0x20 plus a number of characters composed by this
1668         data,
1669
1670         COMPONENTs are characters of multibye form or composition
1671         rules encoded by two-byte of ASCII codes.
1672
1673    In addition, for backward compatibility, the following formats are
1674    also recognized as composition data on decoding.
1675
1676    0x80 MSEQ ...
1677    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1678
1679    Here,
1680         MSEQ is a multibyte form but in these special format:
1681           ASCII: 0xA0 ASCII_CODE+0x80,
1682           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1683         RULE is a one byte code of the range 0xA0..0xF0 that
1684         represents a composition rule.
1685   */
1686
1687 char emacs_mule_bytes[256];
1688
1689 int
1690 emacs_mule_char (coding, src, nbytes, nchars, id)
1691      struct coding_system *coding;
1692      const unsigned char *src;
1693      int *nbytes, *nchars, *id;
1694 {
1695   const unsigned char *src_end = coding->source + coding->src_bytes;
1696   const unsigned char *src_base = src;
1697   int multibytep = coding->src_multibyte;
1698   struct charset *charset;
1699   unsigned code;
1700   int c;
1701   int consumed_chars = 0;
1702
1703   ONE_MORE_BYTE (c);
1704   if (c < 0)
1705     {
1706       c = -c;
1707       charset = emacs_mule_charset[0];
1708     }
1709   else
1710     {
1711       switch (emacs_mule_bytes[c])
1712         {
1713         case 2:
1714           if (! (charset = emacs_mule_charset[c]))
1715             goto invalid_code;
1716           ONE_MORE_BYTE (c);
1717           if (c < 0xA0)
1718             goto invalid_code;
1719           code = c & 0x7F;
1720           break;
1721
1722         case 3:
1723           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1724               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1725             {
1726               ONE_MORE_BYTE (c);
1727               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
1728                 goto invalid_code;
1729               ONE_MORE_BYTE (c);
1730               if (c < 0xA0)
1731                 goto invalid_code;
1732               code = c & 0x7F;
1733             }
1734           else
1735             {
1736               if (! (charset = emacs_mule_charset[c]))
1737                 goto invalid_code;
1738               ONE_MORE_BYTE (c);
1739               if (c < 0xA0)
1740                 goto invalid_code;
1741               code = (c & 0x7F) << 8;
1742               ONE_MORE_BYTE (c);
1743               if (c < 0xA0)
1744                 goto invalid_code;
1745               code |= c & 0x7F;
1746             }
1747           break;
1748
1749         case 4:
1750           ONE_MORE_BYTE (c);
1751           if (c < 0 || ! (charset = emacs_mule_charset[c]))
1752             goto invalid_code;
1753           ONE_MORE_BYTE (c);
1754           if (c < 0xA0)
1755             goto invalid_code;
1756           code = (c & 0x7F) << 8;
1757           ONE_MORE_BYTE (c);
1758           if (c < 0xA0)
1759             goto invalid_code;
1760           code |= c & 0x7F;
1761           break;
1762
1763         case 1:
1764           code = c;
1765           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1766                                      ? charset_ascii : charset_eight_bit);
1767           break;
1768
1769         default:
1770           abort ();
1771         }
1772       c = DECODE_CHAR (charset, code);
1773       if (c < 0)
1774         goto invalid_code;
1775     }
1776   *nbytes = src - src_base;
1777   *nchars = consumed_chars;
1778   if (id)
1779     *id = charset->id;
1780   return c;
1781
1782  no_more_source:
1783   return -2;
1784
1785  invalid_code:
1786   return -1;
1787 }
1788
1789
1790 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1791    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1792    else return 0.  */
1793
1794 static int
1795 detect_coding_emacs_mule (coding, detect_info)
1796      struct coding_system *coding;
1797      struct coding_detection_info *detect_info;
1798 {
1799   const unsigned char *src = coding->source, *src_base;
1800   const unsigned char *src_end = coding->source + coding->src_bytes;
1801   int multibytep = coding->src_multibyte;
1802   int consumed_chars = 0;
1803   int c;
1804   int found = 0;
1805
1806   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1807   /* A coding system of this category is always ASCII compatible.  */
1808   src += coding->head_ascii;
1809
1810   while (1)
1811     {
1812       src_base = src;
1813       ONE_MORE_BYTE (c);
1814       if (c < 0)
1815         continue;
1816       if (c == 0x80)
1817         {
1818           /* Perhaps the start of composite character.  We simple skip
1819              it because analyzing it is too heavy for detecting.  But,
1820              at least, we check that the composite character
1821              constitues of more than 4 bytes.  */
1822           const unsigned char *src_base;
1823
1824         repeat:
1825           src_base = src;
1826           do
1827             {
1828               ONE_MORE_BYTE (c);
1829             }
1830           while (c >= 0xA0);
1831
1832           if (src - src_base <= 4)
1833             break;
1834           found = CATEGORY_MASK_EMACS_MULE;
1835           if (c == 0x80)
1836             goto repeat;
1837         }
1838
1839       if (c < 0x80)
1840         {
1841           if (c < 0x20
1842               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1843             break;
1844         }
1845       else
1846         {
1847           int more_bytes = emacs_mule_bytes[*src_base] - 1;
1848
1849           while (more_bytes > 0)
1850             {
1851               ONE_MORE_BYTE (c);
1852               if (c < 0xA0)
1853                 {
1854                   src--;        /* Unread the last byte.  */
1855                   break;
1856                 }
1857               more_bytes--;
1858             }
1859           if (more_bytes != 0)
1860             break;
1861           found = CATEGORY_MASK_EMACS_MULE;
1862         }
1863     }
1864   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1865   return 0;
1866
1867  no_more_source:
1868   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1869     {
1870       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1871       return 0;
1872     }
1873   detect_info->found |= found;
1874   return 1;
1875 }
1876
1877
1878 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1879
1880 /* Decode a character represented as a component of composition
1881    sequence of Emacs 20/21 style at SRC.  Set C to that character and
1882    update SRC to the head of next character (or an encoded composition
1883    rule).  If SRC doesn't points a composition component, set C to -1.
1884    If SRC points an invalid byte sequence, global exit by a return
1885    value 0.  */
1886
1887 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
1888   if (1)                                                        \
1889     {                                                           \
1890       int c;                                                    \
1891       int nbytes, nchars;                                       \
1892                                                                 \
1893       if (src == src_end)                                       \
1894         break;                                                  \
1895       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1896       if (c < 0)                                                \
1897         {                                                       \
1898           if (c == -2)                                          \
1899             break;                                              \
1900           goto invalid_code;                                    \
1901         }                                                       \
1902       *buf++ = c;                                               \
1903       src += nbytes;                                            \
1904       consumed_chars += nchars;                                 \
1905     }                                                           \
1906   else
1907
1908
1909 /* Decode a composition rule represented as a component of composition
1910    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
1911    and increment BUF.  If SRC points an invalid byte sequence, set C
1912    to -1.  */
1913
1914 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
1915   do {                                                  \
1916     int c, gref, nref;                                  \
1917                                                         \
1918     if (src >= src_end)                                 \
1919       goto invalid_code;                                \
1920     ONE_MORE_BYTE_NO_CHECK (c);                         \
1921     c -= 0x20;                                          \
1922     if (c < 0 || c >= 81)                               \
1923       goto invalid_code;                                \
1924                                                         \
1925     gref = c / 9, nref = c % 9;                         \
1926     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1927   } while (0)
1928
1929
1930 /* Decode a composition rule represented as a component of composition
1931    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
1932    and increment BUF.  If SRC points an invalid byte sequence, set C
1933    to -1.  */
1934
1935 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
1936   do {                                                  \
1937     int gref, nref;                                     \
1938                                                         \
1939     if (src + 1>= src_end)                              \
1940       goto invalid_code;                                \
1941     ONE_MORE_BYTE_NO_CHECK (gref);                      \
1942     gref -= 0x20;                                       \
1943     ONE_MORE_BYTE_NO_CHECK (nref);                      \
1944     nref -= 0x20;                                       \
1945     if (gref < 0 || gref >= 81                          \
1946         || nref < 0 || nref >= 81)                      \
1947       goto invalid_code;                                \
1948     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1949   } while (0)
1950
1951
1952 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
1953   do {                                                                  \
1954     /* Emacs 21 style format.  The first three bytes at SRC are         \
1955        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
1956        the byte length of this composition information, CHARS is the    \
1957        number of characters composed by this composition.  */           \
1958     enum composition_method method = c - 0xF2;                          \
1959     int *charbuf_base = charbuf;                                        \
1960     int consumed_chars_limit;                                           \
1961     int nbytes, nchars;                                                 \
1962                                                                         \
1963     ONE_MORE_BYTE (c);                                                  \
1964     if (c < 0)                                                          \
1965       goto invalid_code;                                                \
1966     nbytes = c - 0xA0;                                                  \
1967     if (nbytes < 3)                                                     \
1968       goto invalid_code;                                                \
1969     ONE_MORE_BYTE (c);                                                  \
1970     if (c < 0)                                                          \
1971       goto invalid_code;                                                \
1972     nchars = c - 0xA0;                                                  \
1973     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
1974     consumed_chars_limit = consumed_chars_base + nbytes;                \
1975     if (method != COMPOSITION_RELATIVE)                                 \
1976       {                                                                 \
1977         int i = 0;                                                      \
1978         while (consumed_chars < consumed_chars_limit)                   \
1979           {                                                             \
1980             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
1981               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
1982             else                                                        \
1983               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
1984             i++;                                                        \
1985           }                                                             \
1986         if (consumed_chars < consumed_chars_limit)                      \
1987           goto invalid_code;                                            \
1988         charbuf_base[0] -= i;                                           \
1989       }                                                                 \
1990   } while (0)
1991
1992
1993 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)            \
1994   do {                                                          \
1995     /* Emacs 20 style format for relative composition.  */      \
1996     /* Store multibyte form of characters to be composed.  */   \
1997     enum composition_method method = COMPOSITION_RELATIVE;      \
1998     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1999     int *buf = components;                                      \
2000     int i, j;                                                   \
2001                                                                 \
2002     src = src_base;                                             \
2003     ONE_MORE_BYTE (c);          /* skip 0x80 */                 \
2004     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
2005       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                 \
2006     if (i < 2)                                                  \
2007       goto invalid_code;                                        \
2008     ADD_COMPOSITION_DATA (charbuf, i, method);                  \
2009     for (j = 0; j < i; j++)                                     \
2010       *charbuf++ = components[j];                               \
2011   } while (0)
2012
2013
2014 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
2015   do {                                                          \
2016     /* Emacs 20 style format for rule-base composition.  */     \
2017     /* Store multibyte form of characters to be composed.  */   \
2018     enum composition_method method = COMPOSITION_WITH_RULE;     \
2019     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
2020     int *buf = components;                                      \
2021     int i, j;                                                   \
2022                                                                 \
2023     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
2024     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
2025       {                                                         \
2026         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
2027         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
2028       }                                                         \
2029     if (i < 1 || (buf - components) % 2 == 0)                   \
2030       goto invalid_code;                                        \
2031     if (charbuf + i + (i / 2) + 1 < charbuf_end)                \
2032       goto no_more_source;                                      \
2033     ADD_COMPOSITION_DATA (buf, i, method);                      \
2034     for (j = 0; j < i; j++)                                     \
2035       *charbuf++ = components[j];                               \
2036     for (j = 0; j < i; j += 2)                                  \
2037       *charbuf++ = components[j];                               \
2038   } while (0)
2039
2040
2041 static void
2042 decode_coding_emacs_mule (coding)
2043      struct coding_system *coding;
2044 {
2045   const unsigned char *src = coding->source + coding->consumed;
2046   const unsigned char *src_end = coding->source + coding->src_bytes;
2047   const unsigned char *src_base;
2048   int *charbuf = coding->charbuf + coding->charbuf_used;
2049   int *charbuf_end
2050     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2051   int consumed_chars = 0, consumed_chars_base;
2052   int multibytep = coding->src_multibyte;
2053   Lisp_Object attrs, charset_list;
2054   int char_offset = coding->produced_char;
2055   int last_offset = char_offset;
2056   int last_id = charset_ascii;
2057
2058   CODING_GET_INFO (coding, attrs, charset_list);
2059
2060   while (1)
2061     {
2062       int c;
2063
2064       src_base = src;
2065       consumed_chars_base = consumed_chars;
2066
2067       if (charbuf >= charbuf_end)
2068         break;
2069
2070       ONE_MORE_BYTE (c);
2071       if (c < 0)
2072         {
2073           *charbuf++ = -c;
2074           char_offset++;
2075         }
2076       else if (c < 0x80)
2077         {
2078           *charbuf++ = c;
2079           char_offset++;
2080         }
2081       else if (c == 0x80)
2082         {
2083           ONE_MORE_BYTE (c);
2084           if (c < 0)
2085             goto invalid_code;
2086           if (c - 0xF2 >= COMPOSITION_RELATIVE
2087               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2088             DECODE_EMACS_MULE_21_COMPOSITION (c);
2089           else if (c < 0xC0)
2090             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2091           else if (c == 0xFF)
2092             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2093           else
2094             goto invalid_code;
2095         }
2096       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2097         {
2098           int nbytes, nchars;
2099           int id;
2100
2101           src = src_base;
2102           consumed_chars = consumed_chars_base;
2103           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2104           if (c < 0)
2105             {
2106               if (c == -2)
2107                 break;
2108               goto invalid_code;
2109             }
2110           if (last_id != id)
2111             {
2112               if (last_id != charset_ascii)
2113                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2114               last_id = id;
2115               last_offset = char_offset;
2116             }
2117           *charbuf++ = c;
2118           src += nbytes;
2119           consumed_chars += nchars;
2120           char_offset++;
2121         }
2122       continue;
2123
2124     invalid_code:
2125       src = src_base;
2126       consumed_chars = consumed_chars_base;
2127       ONE_MORE_BYTE (c);
2128       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2129       char_offset++;
2130       coding->errors++;
2131     }
2132
2133  no_more_source:
2134   if (last_id != charset_ascii)
2135     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2136   coding->consumed_char += consumed_chars_base;
2137   coding->consumed = src_base - coding->source;
2138   coding->charbuf_used = charbuf - coding->charbuf;
2139 }
2140
2141
2142 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2143   do {                                          \
2144     if (id < 0xA0)                              \
2145       codes[0] = id, codes[1] = 0;              \
2146     else if (id < 0xE0)                         \
2147       codes[0] = 0x9A, codes[1] = id;           \
2148     else if (id < 0xF0)                         \
2149       codes[0] = 0x9B, codes[1] = id;           \
2150     else if (id < 0xF5)                         \
2151       codes[0] = 0x9C, codes[1] = id;           \
2152     else                                        \
2153       codes[0] = 0x9D, codes[1] = id;           \
2154   } while (0);
2155
2156
2157 static int
2158 encode_coding_emacs_mule (coding)
2159      struct coding_system *coding;
2160 {
2161   int multibytep = coding->dst_multibyte;
2162   int *charbuf = coding->charbuf;
2163   int *charbuf_end = charbuf + coding->charbuf_used;
2164   unsigned char *dst = coding->destination + coding->produced;
2165   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2166   int safe_room = 8;
2167   int produced_chars = 0;
2168   Lisp_Object attrs, charset_list;
2169   int c;
2170   int preferred_charset_id = -1;
2171
2172   CODING_GET_INFO (coding, attrs, charset_list);
2173   if (! EQ (charset_list, Vemacs_mule_charset_list))
2174     {
2175       CODING_ATTR_CHARSET_LIST (attrs)
2176         = charset_list = Vemacs_mule_charset_list;
2177     }
2178
2179   while (charbuf < charbuf_end)
2180     {
2181       ASSURE_DESTINATION (safe_room);
2182       c = *charbuf++;
2183
2184       if (c < 0)
2185         {
2186           /* Handle an annotation.  */
2187           switch (*charbuf)
2188             {
2189             case CODING_ANNOTATE_COMPOSITION_MASK:
2190               /* Not yet implemented.  */
2191               break;
2192             case CODING_ANNOTATE_CHARSET_MASK:
2193               preferred_charset_id = charbuf[3];
2194               if (preferred_charset_id >= 0
2195                   && NILP (Fmemq (make_number (preferred_charset_id),
2196                                   charset_list)))
2197                 preferred_charset_id = -1;
2198               break;
2199             default:
2200               abort ();
2201             }
2202           charbuf += -c - 1;
2203           continue;
2204         }
2205
2206       if (ASCII_CHAR_P (c))
2207         EMIT_ONE_ASCII_BYTE (c);
2208       else if (CHAR_BYTE8_P (c))
2209         {
2210           c = CHAR_TO_BYTE8 (c);
2211           EMIT_ONE_BYTE (c);
2212         }
2213       else
2214         {
2215           struct charset *charset;
2216           unsigned code;
2217           int dimension;
2218           int emacs_mule_id;
2219           unsigned char leading_codes[2];
2220
2221           if (preferred_charset_id >= 0)
2222             {
2223               charset = CHARSET_FROM_ID (preferred_charset_id);
2224               if (! CHAR_CHARSET_P (c, charset))
2225                 charset = char_charset (c, charset_list, NULL);
2226             }
2227           else
2228             charset = char_charset (c, charset_list, &code);
2229           if (! charset)
2230             {
2231               c = coding->default_char;
2232               if (ASCII_CHAR_P (c))
2233                 {
2234                   EMIT_ONE_ASCII_BYTE (c);
2235                   continue;
2236                 }
2237               charset = char_charset (c, charset_list, &code);
2238             }
2239           dimension = CHARSET_DIMENSION (charset);
2240           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2241           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2242           EMIT_ONE_BYTE (leading_codes[0]);
2243           if (leading_codes[1])
2244             EMIT_ONE_BYTE (leading_codes[1]);
2245           if (dimension == 1)
2246             EMIT_ONE_BYTE (code | 0x80);
2247           else
2248             {
2249               code |= 0x8080;
2250               EMIT_ONE_BYTE (code >> 8);
2251               EMIT_ONE_BYTE (code & 0xFF);
2252             }
2253         }
2254     }
2255   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2256   coding->produced_char += produced_chars;
2257   coding->produced = dst - coding->destination;
2258   return 0;
2259 }
2260
2261 \f
2262 /*** 7. ISO2022 handlers ***/
2263
2264 /* The following note describes the coding system ISO2022 briefly.
2265    Since the intention of this note is to help understand the
2266    functions in this file, some parts are NOT ACCURATE or are OVERLY
2267    SIMPLIFIED.  For thorough understanding, please refer to the
2268    original document of ISO2022.  This is equivalent to the standard
2269    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2270
2271    ISO2022 provides many mechanisms to encode several character sets
2272    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2273    is encoded using bytes less than 128.  This may make the encoded
2274    text a little bit longer, but the text passes more easily through
2275    several types of gateway, some of which strip off the MSB (Most
2276    Significant Bit).
2277
2278    There are two kinds of character sets: control character sets and
2279    graphic character sets.  The former contain control characters such
2280    as `newline' and `escape' to provide control functions (control
2281    functions are also provided by escape sequences).  The latter
2282    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2283    two control character sets and many graphic character sets.
2284
2285    Graphic character sets are classified into one of the following
2286    four classes, according to the number of bytes (DIMENSION) and
2287    number of characters in one dimension (CHARS) of the set:
2288    - DIMENSION1_CHARS94
2289    - DIMENSION1_CHARS96
2290    - DIMENSION2_CHARS94
2291    - DIMENSION2_CHARS96
2292
2293    In addition, each character set is assigned an identification tag,
2294    unique for each set, called the "final character" (denoted as <F>
2295    hereafter).  The <F> of each character set is decided by ECMA(*)
2296    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2297    (0x30..0x3F are for private use only).
2298
2299    Note (*): ECMA = European Computer Manufacturers Association
2300
2301    Here are examples of graphic character sets [NAME(<F>)]:
2302         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2303         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2304         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2305         o DIMENSION2_CHARS96 -- none for the moment
2306
2307    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2308         C0 [0x00..0x1F] -- control character plane 0
2309         GL [0x20..0x7F] -- graphic character plane 0
2310         C1 [0x80..0x9F] -- control character plane 1
2311         GR [0xA0..0xFF] -- graphic character plane 1
2312
2313    A control character set is directly designated and invoked to C0 or
2314    C1 by an escape sequence.  The most common case is that:
2315    - ISO646's  control character set is designated/invoked to C0, and
2316    - ISO6429's control character set is designated/invoked to C1,
2317    and usually these designations/invocations are omitted in encoded
2318    text.  In a 7-bit environment, only C0 can be used, and a control
2319    character for C1 is encoded by an appropriate escape sequence to
2320    fit into the environment.  All control characters for C1 are
2321    defined to have corresponding escape sequences.
2322
2323    A graphic character set is at first designated to one of four
2324    graphic registers (G0 through G3), then these graphic registers are
2325    invoked to GL or GR.  These designations and invocations can be
2326    done independently.  The most common case is that G0 is invoked to
2327    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2328    these invocations and designations are omitted in encoded text.
2329    In a 7-bit environment, only GL can be used.
2330
2331    When a graphic character set of CHARS94 is invoked to GL, codes
2332    0x20 and 0x7F of the GL area work as control characters SPACE and
2333    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2334    be used.
2335
2336    There are two ways of invocation: locking-shift and single-shift.
2337    With locking-shift, the invocation lasts until the next different
2338    invocation, whereas with single-shift, the invocation affects the
2339    following character only and doesn't affect the locking-shift
2340    state.  Invocations are done by the following control characters or
2341    escape sequences:
2342
2343    ----------------------------------------------------------------------
2344    abbrev  function                  cntrl escape seq   description
2345    ----------------------------------------------------------------------
2346    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2347    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2348    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2349    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2350    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2351    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2352    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2353    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2354    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2355    ----------------------------------------------------------------------
2356    (*) These are not used by any known coding system.
2357
2358    Control characters for these functions are defined by macros
2359    ISO_CODE_XXX in `coding.h'.
2360
2361    Designations are done by the following escape sequences:
2362    ----------------------------------------------------------------------
2363    escape sequence      description
2364    ----------------------------------------------------------------------
2365    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2366    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2367    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2368    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2369    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2370    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2371    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2372    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2373    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2374    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2375    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2376    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2377    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2378    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2379    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2380    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2381    ----------------------------------------------------------------------
2382
2383    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2384    of dimension 1, chars 94, and final character <F>, etc...
2385
2386    Note (*): Although these designations are not allowed in ISO2022,
2387    Emacs accepts them on decoding, and produces them on encoding
2388    CHARS96 character sets in a coding system which is characterized as
2389    7-bit environment, non-locking-shift, and non-single-shift.
2390
2391    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2392    '(' must be omitted.  We refer to this as "short-form" hereafter.
2393
2394    Now you may notice that there are a lot of ways of encoding the
2395    same multilingual text in ISO2022.  Actually, there exist many
2396    coding systems such as Compound Text (used in X11's inter client
2397    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2398    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2399    localized platforms), and all of these are variants of ISO2022.
2400
2401    In addition to the above, Emacs handles two more kinds of escape
2402    sequences: ISO6429's direction specification and Emacs' private
2403    sequence for specifying character composition.
2404
2405    ISO6429's direction specification takes the following form:
2406         o CSI ']'      -- end of the current direction
2407         o CSI '0' ']'  -- end of the current direction
2408         o CSI '1' ']'  -- start of left-to-right text
2409         o CSI '2' ']'  -- start of right-to-left text
2410    The control character CSI (0x9B: control sequence introducer) is
2411    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2412
2413    Character composition specification takes the following form:
2414         o ESC '0' -- start relative composition
2415         o ESC '1' -- end composition
2416         o ESC '2' -- start rule-base composition (*)
2417         o ESC '3' -- start relative composition with alternate chars  (**)
2418         o ESC '4' -- start rule-base composition with alternate chars  (**)
2419   Since these are not standard escape sequences of any ISO standard,
2420   the use of them with these meanings is restricted to Emacs only.
2421
2422   (*) This form is used only in Emacs 20.7 and older versions,
2423   but newer versions can safely decode it.
2424   (**) This form is used only in Emacs 21.1 and newer versions,
2425   and older versions can't decode it.
2426
2427   Here's a list of example usages of these composition escape
2428   sequences (categorized by `enum composition_method').
2429
2430   COMPOSITION_RELATIVE:
2431         ESC 0 CHAR [ CHAR ] ESC 1
2432   COMPOSITION_WITH_RULE:
2433         ESC 2 CHAR [ RULE CHAR ] ESC 1
2434   COMPOSITION_WITH_ALTCHARS:
2435         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2436   COMPOSITION_WITH_RULE_ALTCHARS:
2437         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2438
2439 enum iso_code_class_type iso_code_class[256];
2440
2441 #define SAFE_CHARSET_P(coding, id)      \
2442   ((id) <= (coding)->max_charset_id     \
2443    && (coding)->safe_charsets[id] >= 0)
2444
2445
2446 #define SHIFT_OUT_OK(category)  \
2447   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2448
2449 static void
2450 setup_iso_safe_charsets (attrs)
2451      Lisp_Object attrs;
2452 {
2453   Lisp_Object charset_list, safe_charsets;
2454   Lisp_Object request;
2455   Lisp_Object reg_usage;
2456   Lisp_Object tail;
2457   int reg94, reg96;
2458   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2459   int max_charset_id;
2460
2461   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2462   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2463       && ! EQ (charset_list, Viso_2022_charset_list))
2464     {
2465       CODING_ATTR_CHARSET_LIST (attrs)
2466         = charset_list = Viso_2022_charset_list;
2467       ASET (attrs, coding_attr_safe_charsets, Qnil);
2468     }
2469
2470   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2471     return;
2472
2473   max_charset_id = 0;
2474   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2475     {
2476       int id = XINT (XCAR (tail));
2477       if (max_charset_id < id)
2478         max_charset_id = id;
2479     }
2480
2481   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2482                                 make_number (255));
2483   request = AREF (attrs, coding_attr_iso_request);
2484   reg_usage = AREF (attrs, coding_attr_iso_usage);
2485   reg94 = XINT (XCAR (reg_usage));
2486   reg96 = XINT (XCDR (reg_usage));
2487
2488   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2489     {
2490       Lisp_Object id;
2491       Lisp_Object reg;
2492       struct charset *charset;
2493
2494       id = XCAR (tail);
2495       charset = CHARSET_FROM_ID (XINT (id));
2496       reg = Fcdr (Fassq (id, request));
2497       if (! NILP (reg))
2498         SSET (safe_charsets, XINT (id), XINT (reg));
2499       else if (charset->iso_chars_96)
2500         {
2501           if (reg96 < 4)
2502             SSET (safe_charsets, XINT (id), reg96);
2503         }
2504       else
2505         {
2506           if (reg94 < 4)
2507             SSET (safe_charsets, XINT (id), reg94);
2508         }
2509     }
2510   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2511 }
2512
2513
2514 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2515    Check if a text is encoded in one of ISO-2022 based codig systems.
2516    If it is, return 1, else return 0.  */
2517
2518 static int
2519 detect_coding_iso_2022 (coding, detect_info)
2520      struct coding_system *coding;
2521      struct coding_detection_info *detect_info;
2522 {
2523   const unsigned char *src = coding->source, *src_base = src;
2524   const unsigned char *src_end = coding->source + coding->src_bytes;
2525   int multibytep = coding->src_multibyte;
2526   int single_shifting = 0;
2527   int id;
2528   int c, c1;
2529   int consumed_chars = 0;
2530   int i;
2531   int rejected = 0;
2532   int found = 0;
2533
2534   detect_info->checked |= CATEGORY_MASK_ISO;
2535
2536   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2537     {
2538       struct coding_system *this = &(coding_categories[i]);
2539       Lisp_Object attrs, val;
2540
2541       attrs = CODING_ID_ATTRS (this->id);
2542       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2543           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2544         setup_iso_safe_charsets (attrs);
2545       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2546       this->max_charset_id = SCHARS (val) - 1;
2547       this->safe_charsets = (char *) SDATA (val);
2548     }
2549
2550   /* A coding system of this category is always ASCII compatible.  */
2551   src += coding->head_ascii;
2552
2553   while (rejected != CATEGORY_MASK_ISO)
2554     {
2555       src_base = src;
2556       ONE_MORE_BYTE (c);
2557       switch (c)
2558         {
2559         case ISO_CODE_ESC:
2560           if (inhibit_iso_escape_detection)
2561             break;
2562           single_shifting = 0;
2563           ONE_MORE_BYTE (c);
2564           if (c >= '(' && c <= '/')
2565             {
2566               /* Designation sequence for a charset of dimension 1.  */
2567               ONE_MORE_BYTE (c1);
2568               if (c1 < ' ' || c1 >= 0x80
2569                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2570                 /* Invalid designation sequence.  Just ignore.  */
2571                 break;
2572             }
2573           else if (c == '$')
2574             {
2575               /* Designation sequence for a charset of dimension 2.  */
2576               ONE_MORE_BYTE (c);
2577               if (c >= '@' && c <= 'B')
2578                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2579                 id = iso_charset_table[1][0][c];
2580               else if (c >= '(' && c <= '/')
2581                 {
2582                   ONE_MORE_BYTE (c1);
2583                   if (c1 < ' ' || c1 >= 0x80
2584                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2585                     /* Invalid designation sequence.  Just ignore.  */
2586                     break;
2587                 }
2588               else
2589                 /* Invalid designation sequence.  Just ignore it.  */
2590                 break;
2591             }
2592           else if (c == 'N' || c == 'O')
2593             {
2594               /* ESC <Fe> for SS2 or SS3.  */
2595               single_shifting = 1;
2596               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2597               break;
2598             }
2599           else if (c >= '0' && c <= '4')
2600             {
2601               /* ESC <Fp> for start/end composition.  */
2602               found |= CATEGORY_MASK_ISO;
2603               break;
2604             }
2605           else
2606             {
2607               /* Invalid escape sequence.  Just ignore it.  */
2608               break;
2609             }
2610
2611           /* We found a valid designation sequence for CHARSET.  */
2612           rejected |= CATEGORY_MASK_ISO_8BIT;
2613           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2614                               id))
2615             found |= CATEGORY_MASK_ISO_7;
2616           else
2617             rejected |= CATEGORY_MASK_ISO_7;
2618           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2619                               id))
2620             found |= CATEGORY_MASK_ISO_7_TIGHT;
2621           else
2622             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2623           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2624                               id))
2625             found |= CATEGORY_MASK_ISO_7_ELSE;
2626           else
2627             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2628           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2629                               id))
2630             found |= CATEGORY_MASK_ISO_8_ELSE;
2631           else
2632             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2633           break;
2634
2635         case ISO_CODE_SO:
2636         case ISO_CODE_SI:
2637           /* Locking shift out/in.  */
2638           if (inhibit_iso_escape_detection)
2639             break;
2640           single_shifting = 0;
2641           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2642           found |= CATEGORY_MASK_ISO_ELSE;
2643           break;
2644
2645         case ISO_CODE_CSI:
2646           /* Control sequence introducer.  */
2647           single_shifting = 0;
2648           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2649           found |= CATEGORY_MASK_ISO_8_ELSE;
2650           goto check_extra_latin;
2651
2652         case ISO_CODE_SS2:
2653         case ISO_CODE_SS3:
2654           /* Single shift.   */
2655           if (inhibit_iso_escape_detection)
2656             break;
2657           single_shifting = 0;
2658           rejected |= CATEGORY_MASK_ISO_7BIT;
2659           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2660               & CODING_ISO_FLAG_SINGLE_SHIFT)
2661             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
2662           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2663               & CODING_ISO_FLAG_SINGLE_SHIFT)
2664             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2665           if (single_shifting)
2666             break;
2667           goto check_extra_latin;
2668
2669         default:
2670           if (c < 0)
2671             continue;
2672           if (c < 0x80)
2673             {
2674               single_shifting = 0;
2675               break;
2676             }
2677           if (c >= 0xA0)
2678             {
2679               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2680               found |= CATEGORY_MASK_ISO_8_1;
2681               /* Check the length of succeeding codes of the range
2682                  0xA0..0FF.  If the byte length is even, we include
2683                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2684                  only when we are not single shifting.  */
2685               if (! single_shifting
2686                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2687                 {
2688                   int i = 1;
2689                   while (src < src_end)
2690                     {
2691                       ONE_MORE_BYTE (c);
2692                       if (c < 0xA0)
2693                         break;
2694                       i++;
2695                     }
2696
2697                   if (i & 1 && src < src_end)
2698                     rejected |= CATEGORY_MASK_ISO_8_2;
2699                   else
2700                     found |= CATEGORY_MASK_ISO_8_2;
2701                 }
2702               break;
2703             }
2704         check_extra_latin:
2705           single_shifting = 0;
2706           if (! VECTORP (Vlatin_extra_code_table)
2707               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2708             {
2709               rejected = CATEGORY_MASK_ISO;
2710               break;
2711             }
2712           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2713               & CODING_ISO_FLAG_LATIN_EXTRA)
2714             found |= CATEGORY_MASK_ISO_8_1;
2715           else
2716             rejected |= CATEGORY_MASK_ISO_8_1;
2717           rejected |= CATEGORY_MASK_ISO_8_2;
2718         }
2719     }
2720   detect_info->rejected |= CATEGORY_MASK_ISO;
2721   return 0;
2722
2723  no_more_source:
2724   detect_info->rejected |= rejected;
2725   detect_info->found |= (found & ~rejected);
2726   return 1;
2727 }
2728
2729
2730 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
2731    escape sequence should be kept.  */
2732 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2733   do {                                                                  \
2734     int id, prev;                                                       \
2735                                                                         \
2736     if (final < '0' || final >= 128                                     \
2737         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2738         || !SAFE_CHARSET_P (coding, id))                                \
2739       {                                                                 \
2740         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2741         chars_96 = -1;                                                  \
2742         break;                                                          \
2743       }                                                                 \
2744     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2745     if (id == charset_jisx0201_roman)                                   \
2746       {                                                                 \
2747         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2748           id = charset_ascii;                                           \
2749       }                                                                 \
2750     else if (id == charset_jisx0208_1978)                               \
2751       {                                                                 \
2752         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2753           id = charset_jisx0208;                                        \
2754       }                                                                 \
2755     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2756     /* If there was an invalid designation to REG previously, and this  \
2757        designation is ASCII to REG, we should keep this designation     \
2758        sequence.  */                                                    \
2759     if (prev == -2 && id == charset_ascii)                              \
2760       chars_96 = -1;                                                    \
2761   } while (0)
2762
2763
2764 #define MAYBE_FINISH_COMPOSITION()                              \
2765   do {                                                          \
2766     int i;                                                      \
2767     if (composition_state == COMPOSING_NO)                      \
2768       break;                                                    \
2769     /* It is assured that we have enough room for producing     \
2770        characters stored in the table `components'.  */         \
2771     if (charbuf + component_idx > charbuf_end)                  \
2772       goto no_more_source;                                      \
2773     composition_state = COMPOSING_NO;                           \
2774     if (method == COMPOSITION_RELATIVE                          \
2775         || method == COMPOSITION_WITH_ALTCHARS)                 \
2776       {                                                         \
2777         for (i = 0; i < component_idx; i++)                     \
2778           *charbuf++ = components[i];                           \
2779         char_offset += component_idx;                           \
2780       }                                                         \
2781     else                                                        \
2782       {                                                         \
2783         for (i = 0; i < component_idx; i += 2)                  \
2784           *charbuf++ = components[i];                           \
2785         char_offset += (component_idx / 2) + 1;                 \
2786       }                                                         \
2787   } while (0)
2788
2789
2790 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2791    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2792    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2793    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2794    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2795   */
2796
2797 #define DECODE_COMPOSITION_START(c1)                                    \
2798   do {                                                                  \
2799     if (c1 == '0'                                                       \
2800         && composition_state == COMPOSING_COMPONENT_RULE)               \
2801       {                                                                 \
2802         component_len = component_idx;                                  \
2803         composition_state = COMPOSING_CHAR;                             \
2804       }                                                                 \
2805     else                                                                \
2806       {                                                                 \
2807         const unsigned char *p;                                         \
2808                                                                         \
2809         MAYBE_FINISH_COMPOSITION ();                                    \
2810         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
2811           goto no_more_source;                                          \
2812         for (p = src; p < src_end - 1; p++)                             \
2813           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
2814             break;                                                      \
2815         if (p == src_end - 1)                                           \
2816           {                                                             \
2817             if (coding->mode & CODING_MODE_LAST_BLOCK)                  \
2818               goto invalid_code;                                        \
2819             goto no_more_source;                                        \
2820           }                                                             \
2821                                                                         \
2822         /* This is surely the start of a composition.  */               \
2823         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
2824                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
2825                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
2826                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
2827         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
2828                              : COMPOSING_COMPONENT_CHAR);               \
2829         component_idx = component_len = 0;                              \
2830       }                                                                 \
2831   } while (0)
2832
2833
2834 /* Handle compositoin end sequence ESC 1.  */
2835
2836 #define DECODE_COMPOSITION_END()                                        \
2837   do {                                                                  \
2838     int nchars = (component_len > 0 ? component_idx - component_len     \
2839                   : method == COMPOSITION_RELATIVE ? component_idx      \
2840                   : (component_idx + 1) / 2);                           \
2841     int i;                                                              \
2842     int *saved_charbuf = charbuf;                                       \
2843                                                                         \
2844     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
2845     if (method != COMPOSITION_RELATIVE)                                 \
2846       {                                                                 \
2847         if (component_len == 0)                                         \
2848           for (i = 0; i < component_idx; i++)                           \
2849             *charbuf++ = components[i];                                 \
2850         else                                                            \
2851           for (i = 0; i < component_len; i++)                           \
2852             *charbuf++ = components[i];                                 \
2853         *saved_charbuf = saved_charbuf - charbuf;                       \
2854       }                                                                 \
2855     if (method == COMPOSITION_WITH_RULE)                                \
2856       for (i = 0; i < component_idx; i += 2, char_offset++)             \
2857         *charbuf++ = components[i];                                     \
2858     else                                                                \
2859       for (i = component_len; i < component_idx; i++, char_offset++)    \
2860         *charbuf++ = components[i];                                     \
2861     coding->annotated = 1;                                              \
2862     composition_state = COMPOSING_NO;                                   \
2863   } while (0)
2864
2865
2866 /* Decode a composition rule from the byte C1 (and maybe one more byte
2867    from SRC) and store one encoded composition rule in
2868    coding->cmp_data.  */
2869
2870 #define DECODE_COMPOSITION_RULE(c1)                                     \
2871   do {                                                                  \
2872     (c1) -= 32;                                                         \
2873     if (c1 < 81)                /* old format (before ver.21) */        \
2874       {                                                                 \
2875         int gref = (c1) / 9;                                            \
2876         int nref = (c1) % 9;                                            \
2877         if (gref == 4) gref = 10;                                       \
2878         if (nref == 4) nref = 10;                                       \
2879         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
2880       }                                                                 \
2881     else if (c1 < 93)           /* new format (after ver.21) */         \
2882       {                                                                 \
2883         ONE_MORE_BYTE (c2);                                             \
2884         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
2885       }                                                                 \
2886     else                                                                \
2887       c1 = 0;                                                           \
2888   } while (0)
2889
2890
2891 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2892
2893 static void
2894 decode_coding_iso_2022 (coding)
2895      struct coding_system *coding;
2896 {
2897   const unsigned char *src = coding->source + coding->consumed;
2898   const unsigned char *src_end = coding->source + coding->src_bytes;
2899   const unsigned char *src_base;
2900   int *charbuf = coding->charbuf + coding->charbuf_used;
2901   int *charbuf_end
2902     = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
2903   int consumed_chars = 0, consumed_chars_base;
2904   int multibytep = coding->src_multibyte;
2905   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
2906   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2907   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2908   int charset_id_2, charset_id_3;
2909   struct charset *charset;
2910   int c;
2911   /* For handling composition sequence.  */
2912 #define COMPOSING_NO                    0
2913 #define COMPOSING_CHAR                  1
2914 #define COMPOSING_RULE                  2
2915 #define COMPOSING_COMPONENT_CHAR        3
2916 #define COMPOSING_COMPONENT_RULE        4
2917
2918   int composition_state = COMPOSING_NO;
2919   enum composition_method method;
2920   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2921   int component_idx;
2922   int component_len;
2923   Lisp_Object attrs, charset_list;
2924   int char_offset = coding->produced_char;
2925   int last_offset = char_offset;
2926   int last_id = charset_ascii;
2927
2928   CODING_GET_INFO (coding, attrs, charset_list);
2929   setup_iso_safe_charsets (attrs);
2930
2931   while (1)
2932     {
2933       int c1, c2;
2934
2935       src_base = src;
2936       consumed_chars_base = consumed_chars;
2937
2938       if (charbuf >= charbuf_end)
2939         break;
2940
2941       ONE_MORE_BYTE (c1);
2942       if (c1 < 0)
2943         goto invalid_code;
2944
2945       /* We produce at most one character.  */
2946       switch (iso_code_class [c1])
2947         {
2948         case ISO_0x20_or_0x7F:
2949           if (composition_state != COMPOSING_NO)
2950             {
2951               if (composition_state == COMPOSING_RULE
2952                   || composition_state == COMPOSING_COMPONENT_RULE)
2953                 {
2954                   DECODE_COMPOSITION_RULE (c1);
2955                   components[component_idx++] = c1;
2956                   composition_state--;
2957                   continue;
2958                 }
2959             }
2960           if (charset_id_0 < 0
2961               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2962             /* This is SPACE or DEL.  */
2963             charset = CHARSET_FROM_ID (charset_ascii);
2964           else
2965             charset = CHARSET_FROM_ID (charset_id_0);
2966           break;
2967
2968         case ISO_graphic_plane_0:
2969           if (composition_state != COMPOSING_NO)
2970             {
2971               if (composition_state == COMPOSING_RULE
2972                   || composition_state == COMPOSING_COMPONENT_RULE)
2973                 {
2974                   DECODE_COMPOSITION_RULE (c1);
2975                   components[component_idx++] = c1;
2976                   composition_state--;
2977                   continue;
2978                 }
2979             }
2980           if (charset_id_0 < 0)
2981             charset = CHARSET_FROM_ID (charset_ascii);
2982           else
2983             charset = CHARSET_FROM_ID (charset_id_0);
2984           break;
2985
2986         case ISO_0xA0_or_0xFF:
2987           if (charset_id_1 < 0
2988               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2989               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2990             goto invalid_code;
2991           /* This is a graphic character, we fall down ... */
2992
2993         case ISO_graphic_plane_1:
2994           if (charset_id_1 < 0)
2995             goto invalid_code;
2996           charset = CHARSET_FROM_ID (charset_id_1);
2997           break;
2998
2999         case ISO_control_0:
3000           MAYBE_FINISH_COMPOSITION ();
3001           charset = CHARSET_FROM_ID (charset_ascii);
3002           break;
3003
3004         case ISO_control_1:
3005           MAYBE_FINISH_COMPOSITION ();
3006           goto invalid_code;
3007
3008         case ISO_shift_out:
3009           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3010               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3011             goto invalid_code;
3012           CODING_ISO_INVOCATION (coding, 0) = 1;
3013           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3014           continue;
3015
3016         case ISO_shift_in:
3017           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3018             goto invalid_code;
3019           CODING_ISO_INVOCATION (coding, 0) = 0;
3020           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3021           continue;
3022
3023         case ISO_single_shift_2_7:
3024         case ISO_single_shift_2:
3025           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3026             goto invalid_code;
3027           /* SS2 is handled as an escape sequence of ESC 'N' */
3028           c1 = 'N';
3029           goto label_escape_sequence;
3030
3031         case ISO_single_shift_3:
3032           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3033             goto invalid_code;
3034           /* SS2 is handled as an escape sequence of ESC 'O' */
3035           c1 = 'O';
3036           goto label_escape_sequence;
3037
3038         case ISO_control_sequence_introducer:
3039           /* CSI is handled as an escape sequence of ESC '[' ...  */
3040           c1 = '[';
3041           goto label_escape_sequence;
3042
3043         case ISO_escape:
3044           ONE_MORE_BYTE (c1);
3045         label_escape_sequence:
3046           /* Escape sequences handled here are invocation,
3047              designation, direction specification, and character
3048              composition specification.  */
3049           switch (c1)
3050             {
3051             case '&':           /* revision of following character set */
3052               ONE_MORE_BYTE (c1);
3053               if (!(c1 >= '@' && c1 <= '~'))
3054                 goto invalid_code;
3055               ONE_MORE_BYTE (c1);
3056               if (c1 != ISO_CODE_ESC)
3057                 goto invalid_code;
3058               ONE_MORE_BYTE (c1);
3059               goto label_escape_sequence;
3060
3061             case '$':           /* designation of 2-byte character set */
3062               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3063                 goto invalid_code;
3064               {
3065                 int reg, chars96;
3066
3067                 ONE_MORE_BYTE (c1);
3068                 if (c1 >= '@' && c1 <= 'B')
3069                   {     /* designation of JISX0208.1978, GB2312.1980,
3070                            or JISX0208.1980 */
3071                     reg = 0, chars96 = 0;
3072                   }
3073                 else if (c1 >= 0x28 && c1 <= 0x2B)
3074                   { /* designation of DIMENSION2_CHARS94 character set */
3075                     reg = c1 - 0x28, chars96 = 0;
3076                     ONE_MORE_BYTE (c1);
3077                   }
3078                 else if (c1 >= 0x2C && c1 <= 0x2F)
3079                   { /* designation of DIMENSION2_CHARS96 character set */
3080                     reg = c1 - 0x2C, chars96 = 1;
3081                     ONE_MORE_BYTE (c1);
3082                   }
3083                 else
3084                   goto invalid_code;
3085                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3086                 /* We must update these variables now.  */
3087                 if (reg == 0)
3088                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3089                 else if (reg == 1)
3090                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3091                 if (chars96 < 0)
3092                   goto invalid_code;
3093               }
3094               continue;
3095
3096             case 'n':           /* invocation of locking-shift-2 */
3097               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3098                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3099                 goto invalid_code;
3100               CODING_ISO_INVOCATION (coding, 0) = 2;
3101               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3102               continue;
3103
3104             case 'o':           /* invocation of locking-shift-3 */
3105               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3106                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3107                 goto invalid_code;
3108               CODING_ISO_INVOCATION (coding, 0) = 3;
3109               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3110               continue;
3111
3112             case 'N':           /* invocation of single-shift-2 */
3113               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3114                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3115                 goto invalid_code;
3116               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3117               if (charset_id_2 < 0)
3118                 charset = CHARSET_FROM_ID (charset_ascii);
3119               else
3120                 charset = CHARSET_FROM_ID (charset_id_2);
3121               ONE_MORE_BYTE (c1);
3122               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3123                 goto invalid_code;
3124               break;
3125
3126             case 'O':           /* invocation of single-shift-3 */
3127               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3128                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3129                 goto invalid_code;
3130               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3131               if (charset_id_3 < 0)
3132                 charset = CHARSET_FROM_ID (charset_ascii);
3133               else
3134                 charset = CHARSET_FROM_ID (charset_id_3);
3135               ONE_MORE_BYTE (c1);
3136               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3137                 goto invalid_code;
3138               break;
3139
3140             case '0': case '2': case '3': case '4': /* start composition */
3141               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3142                 goto invalid_code;
3143               DECODE_COMPOSITION_START (c1);
3144               continue;
3145
3146             case '1':           /* end composition */
3147               if (composition_state == COMPOSING_NO)
3148                 goto invalid_code;
3149               DECODE_COMPOSITION_END ();
3150               continue;
3151
3152             case '[':           /* specification of direction */
3153               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3154                 goto invalid_code;
3155               /* For the moment, nested direction is not supported.
3156                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3157                  left-to-right, and nozero means right-to-left.  */
3158               ONE_MORE_BYTE (c1);
3159               switch (c1)
3160                 {
3161                 case ']':       /* end of the current direction */
3162                   coding->mode &= ~CODING_MODE_DIRECTION;
3163
3164                 case '0':       /* end of the current direction */
3165                 case '1':       /* start of left-to-right direction */
3166                   ONE_MORE_BYTE (c1);
3167                   if (c1 == ']')
3168                     coding->mode &= ~CODING_MODE_DIRECTION;
3169                   else
3170                     goto invalid_code;
3171                   break;
3172
3173                 case '2':       /* start of right-to-left direction */
3174                   ONE_MORE_BYTE (c1);
3175                   if (c1 == ']')
3176                     coding->mode |= CODING_MODE_DIRECTION;
3177                   else
3178                     goto invalid_code;
3179                   break;
3180
3181                 default:
3182                   goto invalid_code;
3183                 }
3184               continue;
3185
3186             case '%':
3187               ONE_MORE_BYTE (c1);
3188               if (c1 == '/')
3189                 {
3190                   /* CTEXT extended segment:
3191                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3192                      We keep these bytes as is for the moment.
3193                      They may be decoded by post-read-conversion.  */
3194                   int dim, M, L;
3195                   int size;
3196
3197                   ONE_MORE_BYTE (dim);
3198                   ONE_MORE_BYTE (M);
3199                   ONE_MORE_BYTE (L);
3200                   size = ((M - 128) * 128) + (L - 128);
3201                   if (charbuf + 8 + size > charbuf_end)
3202                     goto break_loop;
3203                   *charbuf++ = ISO_CODE_ESC;
3204                   *charbuf++ = '%';
3205                   *charbuf++ = '/';
3206                   *charbuf++ = dim;
3207                   *charbuf++ = BYTE8_TO_CHAR (M);
3208                   *charbuf++ = BYTE8_TO_CHAR (L);
3209                   while (size-- > 0)
3210                     {
3211                       ONE_MORE_BYTE (c1);
3212                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3213                     }
3214                 }
3215               else if (c1 == 'G')
3216                 {
3217                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3218                      ESC % G --UTF-8-BYTES-- ESC % @
3219                      We keep these bytes as is for the moment.
3220                      They may be decoded by post-read-conversion.  */
3221                   int *p = charbuf;
3222
3223                   if (p + 6 > charbuf_end)
3224                     goto break_loop;
3225                   *p++ = ISO_CODE_ESC;
3226                   *p++ = '%';
3227                   *p++ = 'G';
3228                   while (p < charbuf_end)
3229                     {
3230                       ONE_MORE_BYTE (c1);
3231                       if (c1 == ISO_CODE_ESC
3232                           && src + 1 < src_end
3233                           && src[0] == '%'
3234                           && src[1] == '@')
3235                         {
3236                           src += 2;
3237                           break;
3238                         }
3239                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3240                     }
3241                   if (p + 3 > charbuf_end)
3242                     goto break_loop;
3243                   *p++ = ISO_CODE_ESC;
3244                   *p++ = '%';
3245                   *p++ = '@';
3246                   charbuf = p;
3247                 }
3248               else
3249                 goto invalid_code;
3250               continue;
3251               break;
3252
3253             default:
3254               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3255                 goto invalid_code;
3256               {
3257                 int reg, chars96;
3258
3259                 if (c1 >= 0x28 && c1 <= 0x2B)
3260                   { /* designation of DIMENSION1_CHARS94 character set */
3261                     reg = c1 - 0x28, chars96 = 0;
3262                     ONE_MORE_BYTE (c1);
3263                   }
3264                 else if (c1 >= 0x2C && c1 <= 0x2F)
3265                   { /* designation of DIMENSION1_CHARS96 character set */
3266                     reg = c1 - 0x2C, chars96 = 1;
3267                     ONE_MORE_BYTE (c1);
3268                   }
3269                 else
3270                   goto invalid_code;
3271                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3272                 /* We must update these variables now.  */
3273                 if (reg == 0)
3274                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3275                 else if (reg == 1)
3276                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3277                 if (chars96 < 0)
3278                   goto invalid_code;
3279               }
3280               continue;
3281             }
3282         }
3283
3284       if (charset->id != charset_ascii
3285           && last_id != charset->id)
3286         {
3287           if (last_id != charset_ascii)
3288             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3289           last_id = charset->id;
3290           last_offset = char_offset;
3291         }
3292
3293       /* Now we know CHARSET and 1st position code C1 of a character.
3294          Produce a decoded character while getting 2nd position code
3295          C2 if necessary.  */
3296       c1 &= 0x7F;
3297       if (CHARSET_DIMENSION (charset) > 1)
3298         {
3299           ONE_MORE_BYTE (c2);
3300           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3301             /* C2 is not in a valid range.  */
3302             goto invalid_code;
3303           c1 = (c1 << 8) | (c2 & 0x7F);
3304           if (CHARSET_DIMENSION (charset) > 2)
3305             {
3306               ONE_MORE_BYTE (c2);
3307               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3308                 /* C2 is not in a valid range.  */
3309                 goto invalid_code;
3310               c1 = (c1 << 8) | (c2 & 0x7F);
3311             }
3312         }
3313
3314       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3315       if (c < 0)
3316         {
3317           MAYBE_FINISH_COMPOSITION ();
3318           for (; src_base < src; src_base++, char_offset++)
3319             {
3320               if (ASCII_BYTE_P (*src_base))
3321                 *charbuf++ = *src_base;
3322               else
3323                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3324             }
3325         }
3326       else if (composition_state == COMPOSING_NO)
3327         {
3328           *charbuf++ = c;
3329           char_offset++;
3330         }
3331       else
3332         {
3333           components[component_idx++] = c;
3334           if (method == COMPOSITION_WITH_RULE
3335               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3336                   && composition_state == COMPOSING_COMPONENT_CHAR))
3337             composition_state++;
3338         }
3339       continue;
3340
3341     invalid_code:
3342       MAYBE_FINISH_COMPOSITION ();
3343       src = src_base;
3344       consumed_chars = consumed_chars_base;
3345       ONE_MORE_BYTE (c);
3346       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3347       char_offset++;
3348       coding->errors++;
3349       continue;
3350
3351     break_loop:
3352       break;
3353     }
3354
3355  no_more_source:
3356   if (last_id != charset_ascii)
3357     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3358   coding->consumed_char += consumed_chars_base;
3359   coding->consumed = src_base - coding->source;
3360   coding->charbuf_used = charbuf - coding->charbuf;
3361 }
3362
3363
3364 /* ISO2022 encoding stuff.  */
3365
3366 /*
3367    It is not enough to say just "ISO2022" on encoding, we have to
3368    specify more details.  In Emacs, each coding system of ISO2022
3369    variant has the following specifications:
3370         1. Initial designation to G0 thru G3.
3371         2. Allows short-form designation?
3372         3. ASCII should be designated to G0 before control characters?
3373         4. ASCII should be designated to G0 at end of line?
3374         5. 7-bit environment or 8-bit environment?
3375         6. Use locking-shift?
3376         7. Use Single-shift?
3377    And the following two are only for Japanese:
3378         8. Use ASCII in place of JIS0201-1976-Roman?
3379         9. Use JISX0208-1983 in place of JISX0208-1978?
3380    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3381    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3382    details.
3383 */
3384
3385 /* Produce codes (escape sequence) for designating CHARSET to graphic
3386    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3387    '@', 'A', or 'B' and the coding system CODING allows, produce
3388    designation sequence of short-form.  */
3389
3390 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3391   do {                                                                  \
3392     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3393     char *intermediate_char_94 = "()*+";                                \
3394     char *intermediate_char_96 = ",-./";                                \
3395     int revision = -1;                                                  \
3396     int c;                                                              \
3397                                                                         \
3398     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3399       revision = CHARSET_ISO_REVISION (charset);                        \
3400                                                                         \
3401     if (revision >= 0)                                                  \
3402       {                                                                 \
3403         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3404         EMIT_ONE_BYTE ('@' + revision);                                 \
3405       }                                                                 \
3406     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3407     if (CHARSET_DIMENSION (charset) == 1)                               \
3408       {                                                                 \
3409         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3410           c = intermediate_char_94[reg];                                \
3411         else                                                            \
3412           c = intermediate_char_96[reg];                                \
3413         EMIT_ONE_ASCII_BYTE (c);                                        \
3414       }                                                                 \
3415     else                                                                \
3416       {                                                                 \
3417         EMIT_ONE_ASCII_BYTE ('$');                                      \
3418         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3419           {                                                             \
3420             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3421                 || reg != 0                                             \
3422                 || final_char < '@' || final_char > 'B')                \
3423               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3424           }                                                             \
3425         else                                                            \
3426           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3427       }                                                                 \
3428     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3429                                                                         \
3430     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3431   } while (0)
3432
3433
3434 /* The following two macros produce codes (control character or escape
3435    sequence) for ISO2022 single-shift functions (single-shift-2 and
3436    single-shift-3).  */
3437
3438 #define ENCODE_SINGLE_SHIFT_2                                           \
3439   do {                                                                  \
3440     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3441       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3442     else                                                                \
3443       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3444     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3445   } while (0)
3446
3447
3448 #define ENCODE_SINGLE_SHIFT_3                                           \
3449   do {                                                                  \
3450     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3451       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3452     else                                                                \
3453       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3454     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3455   } while (0)
3456
3457
3458 /* The following four macros produce codes (control character or
3459    escape sequence) for ISO2022 locking-shift functions (shift-in,
3460    shift-out, locking-shift-2, and locking-shift-3).  */
3461
3462 #define ENCODE_SHIFT_IN                                 \
3463   do {                                                  \
3464     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3465     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3466   } while (0)
3467
3468
3469 #define ENCODE_SHIFT_OUT                                \
3470   do {                                                  \
3471     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3472     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3473   } while (0)
3474
3475
3476 #define ENCODE_LOCKING_SHIFT_2                          \
3477   do {                                                  \
3478     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3479     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3480   } while (0)
3481
3482
3483 #define ENCODE_LOCKING_SHIFT_3                          \
3484   do {                                                  \
3485     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3486     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3487   } while (0)
3488
3489
3490 /* Produce codes for a DIMENSION1 character whose character set is
3491    CHARSET and whose position-code is C1.  Designation and invocation
3492    sequences are also produced in advance if necessary.  */
3493
3494 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3495   do {                                                                  \
3496     int id = CHARSET_ID (charset);                                      \
3497                                                                         \
3498     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3499         && id == charset_ascii)                                         \
3500       {                                                                 \
3501         id = charset_jisx0201_roman;                                    \
3502         charset = CHARSET_FROM_ID (id);                                 \
3503       }                                                                 \
3504                                                                         \
3505     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3506       {                                                                 \
3507         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3508           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3509         else                                                            \
3510           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3511         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3512         break;                                                          \
3513       }                                                                 \
3514     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3515       {                                                                 \
3516         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3517         break;                                                          \
3518       }                                                                 \
3519     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3520       {                                                                 \
3521         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3522         break;                                                          \
3523       }                                                                 \
3524     else                                                                \
3525       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3526          must invoke it, or, at first, designate it to some graphic     \
3527          register.  Then repeat the loop to actually produce the        \
3528          character.  */                                                 \
3529       dst = encode_invocation_designation (charset, coding, dst,        \
3530                                            &produced_chars);            \
3531   } while (1)
3532
3533
3534 /* Produce codes for a DIMENSION2 character whose character set is
3535    CHARSET and whose position-codes are C1 and C2.  Designation and
3536    invocation codes are also produced in advance if necessary.  */
3537
3538 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3539   do {                                                                  \
3540     int id = CHARSET_ID (charset);                                      \
3541                                                                         \
3542     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3543         && id == charset_jisx0208)                                      \
3544       {                                                                 \
3545         id = charset_jisx0208_1978;                                     \
3546         charset = CHARSET_FROM_ID (id);                                 \
3547       }                                                                 \
3548                                                                         \
3549     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3550       {                                                                 \
3551         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3552           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3553         else                                                            \
3554           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3555         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3556         break;                                                          \
3557       }                                                                 \
3558     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3559       {                                                                 \
3560         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3561         break;                                                          \
3562       }                                                                 \
3563     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3564       {                                                                 \
3565         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3566         break;                                                          \
3567       }                                                                 \
3568     else                                                                \
3569       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3570          must invoke it, or, at first, designate it to some graphic     \
3571          register.  Then repeat the loop to actually produce the        \
3572          character.  */                                                 \
3573       dst = encode_invocation_designation (charset, coding, dst,        \
3574                                            &produced_chars);            \
3575   } while (1)
3576
3577
3578 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3579   do {                                                                     \
3580     int code = ENCODE_CHAR ((charset),(c));                                \
3581                                                                            \
3582     if (CHARSET_DIMENSION (charset) == 1)                                  \
3583       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3584     else                                                                   \
3585       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3586   } while (0)
3587
3588
3589 /* Produce designation and invocation codes at a place pointed by DST
3590    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3591    Return new DST.  */
3592
3593 unsigned char *
3594 encode_invocation_designation (charset, coding, dst, p_nchars)
3595      struct charset *charset;
3596      struct coding_system *coding;
3597      unsigned char *dst;
3598      int *p_nchars;
3599 {
3600   int multibytep = coding->dst_multibyte;
3601   int produced_chars = *p_nchars;
3602   int reg;                      /* graphic register number */
3603   int id = CHARSET_ID (charset);
3604
3605   /* At first, check designations.  */
3606   for (reg = 0; reg < 4; reg++)
3607     if (id == CODING_ISO_DESIGNATION (coding, reg))
3608       break;
3609
3610   if (reg >= 4)
3611     {
3612       /* CHARSET is not yet designated to any graphic registers.  */
3613       /* At first check the requested designation.  */
3614       reg = CODING_ISO_REQUEST (coding, id);
3615       if (reg < 0)
3616         /* Since CHARSET requests no special designation, designate it
3617            to graphic register 0.  */
3618         reg = 0;
3619
3620       ENCODE_DESIGNATION (charset, reg, coding);
3621     }
3622
3623   if (CODING_ISO_INVOCATION (coding, 0) != reg
3624       && CODING_ISO_INVOCATION (coding, 1) != reg)
3625     {
3626       /* Since the graphic register REG is not invoked to any graphic
3627          planes, invoke it to graphic plane 0.  */
3628       switch (reg)
3629         {
3630         case 0:                 /* graphic register 0 */
3631           ENCODE_SHIFT_IN;
3632           break;
3633
3634         case 1:                 /* graphic register 1 */
3635           ENCODE_SHIFT_OUT;
3636           break;
3637
3638         case 2:                 /* graphic register 2 */
3639           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3640             ENCODE_SINGLE_SHIFT_2;
3641           else
3642             ENCODE_LOCKING_SHIFT_2;
3643           break;
3644
3645         case 3:                 /* graphic register 3 */
3646           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3647             ENCODE_SINGLE_SHIFT_3;
3648           else
3649             ENCODE_LOCKING_SHIFT_3;
3650           break;
3651         }
3652     }
3653
3654   *p_nchars = produced_chars;
3655   return dst;
3656 }
3657
3658 /* The following three macros produce codes for indicating direction
3659    of text.  */
3660 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3661   do {                                                                  \
3662     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3663       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3664     else                                                                \
3665       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3666   } while (0)
3667
3668
3669 #define ENCODE_DIRECTION_R2L()                  \
3670   do {                                          \
3671     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3672     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3673   } while (0)
3674
3675
3676 #define ENCODE_DIRECTION_L2R()                  \
3677   do {                                          \
3678     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3679     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3680   } while (0)
3681
3682
3683 /* Produce codes for designation and invocation to reset the graphic
3684    planes and registers to initial state.  */
3685 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3686   do {                                                                  \
3687     int reg;                                                            \
3688     struct charset *charset;                                            \
3689                                                                         \
3690     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3691       ENCODE_SHIFT_IN;                                                  \
3692     for (reg = 0; reg < 4; reg++)                                       \
3693       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3694           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3695               != CODING_ISO_INITIAL (coding, reg)))                     \
3696         {                                                               \
3697           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3698           ENCODE_DESIGNATION (charset, reg, coding);                    \
3699         }                                                               \
3700   } while (0)
3701
3702
3703 /* Produce designation sequences of charsets in the line started from
3704    SRC to a place pointed by DST, and return updated DST.
3705
3706    If the current block ends before any end-of-line, we may fail to
3707    find all the necessary designations.  */
3708
3709 static unsigned char *
3710 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3711      struct coding_system *coding;
3712      int *charbuf, *charbuf_end;
3713      unsigned char *dst;
3714 {
3715   struct charset *charset;
3716   /* Table of charsets to be designated to each graphic register.  */
3717   int r[4];
3718   int c, found = 0, reg;
3719   int produced_chars = 0;
3720   int multibytep = coding->dst_multibyte;
3721   Lisp_Object attrs;
3722   Lisp_Object charset_list;
3723
3724   attrs = CODING_ID_ATTRS (coding->id);
3725   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3726   if (EQ (charset_list, Qiso_2022))
3727     charset_list = Viso_2022_charset_list;
3728
3729   for (reg = 0; reg < 4; reg++)
3730     r[reg] = -1;
3731
3732   while (found < 4)
3733     {
3734       int id;
3735
3736       c = *charbuf++;
3737       if (c == '\n')
3738         break;
3739       charset = char_charset (c, charset_list, NULL);
3740       id = CHARSET_ID (charset);
3741       reg = CODING_ISO_REQUEST (coding, id);
3742       if (reg >= 0 && r[reg] < 0)
3743         {
3744           found++;
3745           r[reg] = id;
3746         }
3747     }
3748
3749   if (found)
3750     {
3751       for (reg = 0; reg < 4; reg++)
3752         if (r[reg] >= 0
3753             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3754           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3755     }
3756
3757   return dst;
3758 }
3759
3760 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
3761
3762 static int
3763 encode_coding_iso_2022 (coding)
3764      struct coding_system *coding;
3765 {
3766   int multibytep = coding->dst_multibyte;
3767   int *charbuf = coding->charbuf;
3768   int *charbuf_end = charbuf + coding->charbuf_used;
3769   unsigned char *dst = coding->destination + coding->produced;
3770   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3771   int safe_room = 16;
3772   int bol_designation
3773     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3774        && CODING_ISO_BOL (coding));
3775   int produced_chars = 0;
3776   Lisp_Object attrs, eol_type, charset_list;
3777   int ascii_compatible;
3778   int c;
3779   int preferred_charset_id = -1;
3780
3781   CODING_GET_INFO (coding, attrs, charset_list);
3782   eol_type = CODING_ID_EOL_TYPE (coding->id);
3783   if (VECTORP (eol_type))
3784     eol_type = Qunix;
3785
3786   setup_iso_safe_charsets (attrs);
3787   /* Charset list may have been changed.  */
3788   charset_list = CODING_ATTR_CHARSET_LIST (attrs);              \
3789   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3790
3791   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3792
3793   while (charbuf < charbuf_end)
3794     {
3795       ASSURE_DESTINATION (safe_room);
3796
3797       if (bol_designation)
3798         {
3799           unsigned char *dst_prev = dst;
3800
3801           /* We have to produce designation sequences if any now.  */
3802           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3803           bol_designation = 0;
3804           /* We are sure that designation sequences are all ASCII bytes.  */
3805           produced_chars += dst - dst_prev;
3806         }
3807
3808       c = *charbuf++;
3809
3810       if (c < 0)
3811         {
3812           /* Handle an annotation.  */
3813           switch (*charbuf)
3814             {
3815             case CODING_ANNOTATE_COMPOSITION_MASK:
3816               /* Not yet implemented.  */
3817               break;
3818             case CODING_ANNOTATE_CHARSET_MASK:
3819               preferred_charset_id = charbuf[3];
3820               if (preferred_charset_id >= 0
3821                   && NILP (Fmemq (make_number (preferred_charset_id),
3822                                   charset_list)))
3823                 preferred_charset_id = -1;
3824               break;
3825             default:
3826               abort ();
3827             }
3828           charbuf += -c - 1;
3829           continue;
3830         }
3831
3832       /* Now encode the character C.  */
3833       if (c < 0x20 || c == 0x7F)
3834         {
3835           if (c == '\n'
3836               || (c == '\r' && EQ (eol_type, Qmac)))
3837             {
3838               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3839                 ENCODE_RESET_PLANE_AND_REGISTER ();
3840               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3841                 {
3842                   int i;
3843
3844                   for (i = 0; i < 4; i++)
3845                     CODING_ISO_DESIGNATION (coding, i)
3846                       = CODING_ISO_INITIAL (coding, i);
3847                 }
3848               bol_designation
3849                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3850             }
3851           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3852             ENCODE_RESET_PLANE_AND_REGISTER ();
3853           EMIT_ONE_ASCII_BYTE (c);
3854         }
3855       else if (ASCII_CHAR_P (c))
3856         {
3857           if (ascii_compatible)
3858             EMIT_ONE_ASCII_BYTE (c);
3859           else
3860             {
3861               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3862               ENCODE_ISO_CHARACTER (charset, c);
3863             }
3864         }
3865       else if (CHAR_BYTE8_P (c))
3866         {
3867           c = CHAR_TO_BYTE8 (c);
3868           EMIT_ONE_BYTE (c);
3869         }
3870       else
3871         {
3872           struct charset *charset;
3873
3874           if (preferred_charset_id >= 0)
3875             {
3876               charset = CHARSET_FROM_ID (preferred_charset_id);
3877               if (! CHAR_CHARSET_P (c, charset))
3878                 charset = char_charset (c, charset_list, NULL);
3879             }
3880           else
3881             charset = char_charset (c, charset_list, NULL);
3882           if (!charset)
3883             {
3884               if (coding->mode & CODING_MODE_SAFE_ENCODING)
3885                 {
3886                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3887                   charset = CHARSET_FROM_ID (charset_ascii);
3888                 }
3889               else
3890                 {
3891                   c = coding->default_char;
3892                   charset = char_charset (c, charset_list, NULL);
3893                 }
3894             }
3895           ENCODE_ISO_CHARACTER (charset, c);
3896         }
3897     }
3898
3899   if (coding->mode & CODING_MODE_LAST_BLOCK
3900       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3901     {
3902       ASSURE_DESTINATION (safe_room);
3903       ENCODE_RESET_PLANE_AND_REGISTER ();
3904     }
3905   record_conversion_result (coding, CODING_RESULT_SUCCESS);
3906   CODING_ISO_BOL (coding) = bol_designation;
3907   coding->produced_char += produced_chars;
3908   coding->produced = dst - coding->destination;
3909   return 0;
3910 }
3911
3912 \f
3913 /*** 8,9. SJIS and BIG5 handlers ***/
3914
3915 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3916    quite widely.  So, for the moment, Emacs supports them in the bare
3917    C code.  But, in the future, they may be supported only by CCL.  */
3918
3919 /* SJIS is a coding system encoding three character sets: ASCII, right
3920    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
3921    as is.  A character of charset katakana-jisx0201 is encoded by
3922    "position-code + 0x80".  A character of charset japanese-jisx0208
3923    is encoded in 2-byte but two position-codes are divided and shifted
3924    so that it fit in the range below.
3925
3926    --- CODE RANGE of SJIS ---
3927    (character set)      (range)
3928    ASCII                0x00 .. 0x7F
3929    KATAKANA-JISX0201    0xA0 .. 0xDF
3930    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
3931             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
3932    -------------------------------
3933
3934 */
3935
3936 /* BIG5 is a coding system encoding two character sets: ASCII and
3937    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
3938    character set and is encoded in two-byte.
3939
3940    --- CODE RANGE of BIG5 ---
3941    (character set)      (range)
3942    ASCII                0x00 .. 0x7F
3943    Big5 (1st byte)      0xA1 .. 0xFE
3944         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
3945    --------------------------
3946
3947   */
3948
3949 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3950    Check if a text is encoded in SJIS.  If it is, return
3951    CATEGORY_MASK_SJIS, else return 0.  */
3952
3953 static int
3954 detect_coding_sjis (coding, detect_info)
3955      struct coding_system *coding;
3956      struct coding_detection_info *detect_info;
3957 {
3958   const unsigned char *src = coding->source, *src_base;
3959   const unsigned char *src_end = coding->source + coding->src_bytes;
3960   int multibytep = coding->src_multibyte;
3961   int consumed_chars = 0;
3962   int found = 0;
3963   int c;
3964
3965   detect_info->checked |= CATEGORY_MASK_SJIS;
3966   /* A coding system of this category is always ASCII compatible.  */
3967   src += coding->head_ascii;
3968
3969   while (1)
3970     {
3971       src_base = src;
3972       ONE_MORE_BYTE (c);
3973       if (c < 0x80)
3974         continue;
3975       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3976         {
3977           ONE_MORE_BYTE (c);
3978           if (c < 0x40 || c == 0x7F || c > 0xFC)
3979             break;
3980           found = CATEGORY_MASK_SJIS;
3981         }
3982       else if (c >= 0xA0 && c < 0xE0)
3983         found = CATEGORY_MASK_SJIS;
3984       else
3985         break;
3986     }
3987   detect_info->rejected |= CATEGORY_MASK_SJIS;
3988   return 0;
3989
3990  no_more_source:
3991   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
3992     {
3993       detect_info->rejected |= CATEGORY_MASK_SJIS;
3994       return 0;
3995     }
3996   detect_info->found |= found;
3997   return 1;
3998 }
3999
4000 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4001    Check if a text is encoded in BIG5.  If it is, return
4002    CATEGORY_MASK_BIG5, else return 0.  */
4003
4004 static int
4005 detect_coding_big5 (coding, detect_info)
4006      struct coding_system *coding;
4007      struct coding_detection_info *detect_info;
4008 {
4009   const unsigned char *src = coding->source, *src_base;
4010   const unsigned char *src_end = coding->source + coding->src_bytes;
4011   int multibytep = coding->src_multibyte;
4012   int consumed_chars = 0;
4013   int found = 0;
4014   int c;
4015
4016   detect_info->checked |= CATEGORY_MASK_BIG5;
4017   /* A coding system of this category is always ASCII compatible.  */
4018   src += coding->head_ascii;
4019
4020   while (1)
4021     {
4022       src_base = src;
4023       ONE_MORE_BYTE (c);
4024       if (c < 0x80)
4025         continue;
4026       if (c >= 0xA1)
4027         {
4028           ONE_MORE_BYTE (c);
4029           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4030             return 0;
4031           found = CATEGORY_MASK_BIG5;
4032         }
4033       else
4034         break;
4035     }
4036   detect_info->rejected |= CATEGORY_MASK_BIG5;
4037   return 0;
4038
4039  no_more_source:
4040   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4041     {
4042       detect_info->rejected |= CATEGORY_MASK_BIG5;
4043       return 0;
4044     }
4045   detect_info->found |= found;
4046   return 1;
4047 }
4048
4049 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4050    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4051
4052 static void
4053 decode_coding_sjis (coding)
4054      struct coding_system *coding;
4055 {
4056   const unsigned char *src = coding->source + coding->consumed;
4057   const unsigned char *src_end = coding->source + coding->src_bytes;
4058   const unsigned char *src_base;
4059   int *charbuf = coding->charbuf + coding->charbuf_used;
4060   int *charbuf_end
4061     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4062   int consumed_chars = 0, consumed_chars_base;
4063   int multibytep = coding->src_multibyte;
4064   struct charset *charset_roman, *charset_kanji, *charset_kana;
4065   struct charset *charset_kanji2;
4066   Lisp_Object attrs, charset_list, val;
4067   int char_offset = coding->produced_char;
4068   int last_offset = char_offset;
4069   int last_id = charset_ascii;
4070
4071   CODING_GET_INFO (coding, attrs, charset_list);
4072
4073   val = charset_list;
4074   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4075   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4076   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4077   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4078
4079   while (1)
4080     {
4081       int c, c1;
4082       struct charset *charset;
4083
4084       src_base = src;
4085       consumed_chars_base = consumed_chars;
4086
4087       if (charbuf >= charbuf_end)
4088         break;
4089
4090       ONE_MORE_BYTE (c);
4091       if (c < 0)
4092         goto invalid_code;
4093       if (c < 0x80)
4094         charset = charset_roman;
4095       else if (c == 0x80 || c == 0xA0)
4096         goto invalid_code;
4097       else if (c >= 0xA1 && c <= 0xDF)
4098         {
4099           /* SJIS -> JISX0201-Kana */
4100           c &= 0x7F;
4101           charset = charset_kana;
4102         }
4103       else if (c <= 0xEF)
4104         {
4105           /* SJIS -> JISX0208 */
4106           ONE_MORE_BYTE (c1);
4107           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4108             goto invalid_code;
4109           c = (c << 8) | c1;
4110           SJIS_TO_JIS (c);
4111           charset = charset_kanji;
4112         }
4113       else if (c <= 0xFC && charset_kanji2)
4114         {
4115           /* SJIS -> JISX0213-2 */
4116           ONE_MORE_BYTE (c1);
4117           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4118             goto invalid_code;
4119           c = (c << 8) | c1;
4120           SJIS_TO_JIS2 (c);
4121           charset = charset_kanji2;
4122         }
4123       else
4124         goto invalid_code;
4125       if (charset->id != charset_ascii
4126           && last_id != charset->id)
4127         {
4128           if (last_id != charset_ascii)
4129             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4130           last_id = charset->id;
4131           last_offset = char_offset;
4132         }
4133       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4134       *charbuf++ = c;
4135       char_offset++;
4136       continue;
4137
4138     invalid_code:
4139       src = src_base;
4140       consumed_chars = consumed_chars_base;
4141       ONE_MORE_BYTE (c);
4142       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4143       char_offset++;
4144       coding->errors++;
4145     }
4146
4147  no_more_source:
4148   if (last_id != charset_ascii)
4149     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4150   coding->consumed_char += consumed_chars_base;
4151   coding->consumed = src_base - coding->source;
4152   coding->charbuf_used = charbuf - coding->charbuf;
4153 }
4154
4155 static void
4156 decode_coding_big5 (coding)
4157      struct coding_system *coding;
4158 {
4159   const unsigned char *src = coding->source + coding->consumed;
4160   const unsigned char *src_end = coding->source + coding->src_bytes;
4161   const unsigned char *src_base;
4162   int *charbuf = coding->charbuf + coding->charbuf_used;
4163   int *charbuf_end
4164     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4165   int consumed_chars = 0, consumed_chars_base;
4166   int multibytep = coding->src_multibyte;
4167   struct charset *charset_roman, *charset_big5;
4168   Lisp_Object attrs, charset_list, val;
4169   int char_offset = coding->produced_char;
4170   int last_offset = char_offset;
4171   int last_id = charset_ascii;
4172
4173   CODING_GET_INFO (coding, attrs, charset_list);
4174   val = charset_list;
4175   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4176   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4177
4178   while (1)
4179     {
4180       int c, c1;
4181       struct charset *charset;
4182
4183       src_base = src;
4184       consumed_chars_base = consumed_chars;
4185
4186       if (charbuf >= charbuf_end)
4187         break;
4188
4189       ONE_MORE_BYTE (c);
4190
4191       if (c < 0)
4192         goto invalid_code;
4193       if (c < 0x80)
4194         charset = charset_roman;
4195       else
4196         {
4197           /* BIG5 -> Big5 */
4198           if (c < 0xA1 || c > 0xFE)
4199             goto invalid_code;
4200           ONE_MORE_BYTE (c1);
4201           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4202             goto invalid_code;
4203           c = c << 8 | c1;
4204           charset = charset_big5;
4205         }
4206       if (charset->id != charset_ascii
4207           && last_id != charset->id)
4208         {
4209           if (last_id != charset_ascii)
4210             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4211           last_id = charset->id;
4212           last_offset = char_offset;
4213         }
4214       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4215       *charbuf++ = c;
4216       char_offset++;
4217       continue;
4218
4219     invalid_code:
4220       src = src_base;
4221       consumed_chars = consumed_chars_base;
4222       ONE_MORE_BYTE (c);
4223       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4224       char_offset++;
4225       coding->errors++;
4226     }
4227
4228  no_more_source:
4229   if (last_id != charset_ascii)
4230     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4231   coding->consumed_char += consumed_chars_base;
4232   coding->consumed = src_base - coding->source;
4233   coding->charbuf_used = charbuf - coding->charbuf;
4234 }
4235
4236 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4237    This function can encode charsets `ascii', `katakana-jisx0201',
4238    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4239    are sure that all these charsets are registered as official charset
4240    (i.e. do not have extended leading-codes).  Characters of other
4241    charsets are produced without any encoding.  If SJIS_P is 1, encode
4242    SJIS text, else encode BIG5 text.  */
4243
4244 static int
4245 encode_coding_sjis (coding)
4246      struct coding_system *coding;
4247 {
4248   int multibytep = coding->dst_multibyte;
4249   int *charbuf = coding->charbuf;
4250   int *charbuf_end = charbuf + coding->charbuf_used;
4251   unsigned char *dst = coding->destination + coding->produced;
4252   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4253   int safe_room = 4;
4254   int produced_chars = 0;
4255   Lisp_Object attrs, charset_list, val;
4256   int ascii_compatible;
4257   struct charset *charset_roman, *charset_kanji, *charset_kana;
4258   struct charset *charset_kanji2;
4259   int c;
4260
4261   CODING_GET_INFO (coding, attrs, charset_list);
4262   val = charset_list;
4263   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4264   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4265   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4266   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4267
4268   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4269
4270   while (charbuf < charbuf_end)
4271     {
4272       ASSURE_DESTINATION (safe_room);
4273       c = *charbuf++;
4274       /* Now encode the character C.  */
4275       if (ASCII_CHAR_P (c) && ascii_compatible)
4276         EMIT_ONE_ASCII_BYTE (c);
4277       else if (CHAR_BYTE8_P (c))
4278         {
4279           c = CHAR_TO_BYTE8 (c);
4280           EMIT_ONE_BYTE (c);
4281         }
4282       else
4283         {
4284           unsigned code;
4285           struct charset *charset = char_charset (c, charset_list, &code);
4286
4287           if (!charset)
4288             {
4289               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4290                 {
4291                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4292                   charset = CHARSET_FROM_ID (charset_ascii);
4293                 }
4294               else
4295                 {
4296                   c = coding->default_char;
4297                   charset = char_charset (c, charset_list, &code);
4298                 }
4299             }
4300           if (code == CHARSET_INVALID_CODE (charset))
4301             abort ();
4302           if (charset == charset_kanji)
4303             {
4304               int c1, c2;
4305               JIS_TO_SJIS (code);
4306               c1 = code >> 8, c2 = code & 0xFF;
4307               EMIT_TWO_BYTES (c1, c2);
4308             }
4309           else if (charset == charset_kana)
4310             EMIT_ONE_BYTE (code | 0x80);
4311           else if (charset_kanji2 && charset == charset_kanji2)
4312             {
4313               int c1, c2;
4314
4315               c1 = code >> 8;
4316               if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4317                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4318                 {
4319                   JIS_TO_SJIS2 (code);
4320                   c1 = code >> 8, c2 = code & 0xFF;
4321                   EMIT_TWO_BYTES (c1, c2);
4322                 }
4323               else
4324                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4325             }
4326           else
4327             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4328         }
4329     }
4330   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4331   coding->produced_char += produced_chars;
4332   coding->produced = dst - coding->destination;
4333   return 0;
4334 }
4335
4336 static int
4337 encode_coding_big5 (coding)
4338      struct coding_system *coding;
4339 {
4340   int multibytep = coding->dst_multibyte;
4341   int *charbuf = coding->charbuf;
4342   int *charbuf_end = charbuf + coding->charbuf_used;
4343   unsigned char *dst = coding->destination + coding->produced;
4344   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4345   int safe_room = 4;
4346   int produced_chars = 0;
4347   Lisp_Object attrs, charset_list, val;
4348   int ascii_compatible;
4349   struct charset *charset_roman, *charset_big5;
4350   int c;
4351
4352   CODING_GET_INFO (coding, attrs, charset_list);
4353   val = charset_list;
4354   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4355   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4356   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4357
4358   while (charbuf < charbuf_end)
4359     {
4360       ASSURE_DESTINATION (safe_room);
4361       c = *charbuf++;
4362       /* Now encode the character C.  */
4363       if (ASCII_CHAR_P (c) && ascii_compatible)
4364         EMIT_ONE_ASCII_BYTE (c);
4365       else if (CHAR_BYTE8_P (c))
4366         {
4367           c = CHAR_TO_BYTE8 (c);
4368           EMIT_ONE_BYTE (c);
4369         }
4370       else
4371         {
4372           unsigned code;
4373           struct charset *charset = char_charset (c, charset_list, &code);
4374
4375           if (! charset)
4376             {
4377               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4378                 {
4379                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4380                   charset = CHARSET_FROM_ID (charset_ascii);
4381                 }
4382               else
4383                 {
4384                   c = coding->default_char;
4385                   charset = char_charset (c, charset_list, &code);
4386                 }
4387             }
4388           if (code == CHARSET_INVALID_CODE (charset))
4389             abort ();
4390           if (charset == charset_big5)
4391             {
4392               int c1, c2;
4393
4394               c1 = code >> 8, c2 = code & 0xFF;
4395               EMIT_TWO_BYTES (c1, c2);
4396             }
4397           else
4398             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4399         }
4400     }
4401   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4402   coding->produced_char += produced_chars;
4403   coding->produced = dst - coding->destination;
4404   return 0;
4405 }
4406
4407 \f
4408 /*** 10. CCL handlers ***/
4409
4410 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4411    Check if a text is encoded in a coding system of which
4412    encoder/decoder are written in CCL program.  If it is, return
4413    CATEGORY_MASK_CCL, else return 0.  */
4414
4415 static int
4416 detect_coding_ccl (coding, detect_info)
4417      struct coding_system *coding;
4418      struct coding_detection_info *detect_info;
4419 {
4420   const unsigned char *src = coding->source, *src_base;
4421   const unsigned char *src_end = coding->source + coding->src_bytes;
4422   int multibytep = coding->src_multibyte;
4423   int consumed_chars = 0;
4424   int found = 0;
4425   unsigned char *valids;
4426   int head_ascii = coding->head_ascii;
4427   Lisp_Object attrs;
4428
4429   detect_info->checked |= CATEGORY_MASK_CCL;
4430
4431   coding = &coding_categories[coding_category_ccl];
4432   valids = CODING_CCL_VALIDS (coding);
4433   attrs = CODING_ID_ATTRS (coding->id);
4434   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4435     src += head_ascii;
4436
4437   while (1)
4438     {
4439       int c;
4440
4441       src_base = src;
4442       ONE_MORE_BYTE (c);
4443       if (c < 0 || ! valids[c])
4444         break;
4445       if ((valids[c] > 1))
4446         found = CATEGORY_MASK_CCL;
4447     }
4448   detect_info->rejected |= CATEGORY_MASK_CCL;
4449   return 0;
4450
4451  no_more_source:
4452   detect_info->found |= found;
4453   return 1;
4454 }
4455
4456 static void
4457 decode_coding_ccl (coding)
4458      struct coding_system *coding;
4459 {
4460   const unsigned char *src = coding->source + coding->consumed;
4461   const unsigned char *src_end = coding->source + coding->src_bytes;
4462   int *charbuf = coding->charbuf + coding->charbuf_used;
4463   int *charbuf_end = coding->charbuf + coding->charbuf_size;
4464   int consumed_chars = 0;
4465   int multibytep = coding->src_multibyte;
4466   struct ccl_program ccl;
4467   int source_charbuf[1024];
4468   int source_byteidx[1024];
4469   Lisp_Object attrs, charset_list;
4470
4471   CODING_GET_INFO (coding, attrs, charset_list);
4472   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4473
4474   while (src < src_end)
4475     {
4476       const unsigned char *p = src;
4477       int *source, *source_end;
4478       int i = 0;
4479
4480       if (multibytep)
4481         while (i < 1024 && p < src_end)
4482           {
4483             source_byteidx[i] = p - src;
4484             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4485           }
4486       else
4487         while (i < 1024 && p < src_end)
4488           source_charbuf[i++] = *p++;
4489
4490       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4491         ccl.last_block = 1;
4492
4493       source = source_charbuf;
4494       source_end = source + i;
4495       while (source < source_end)
4496         {
4497           ccl_driver (&ccl, source, charbuf,
4498                       source_end - source, charbuf_end - charbuf,
4499                       charset_list);
4500           source += ccl.consumed;
4501           charbuf += ccl.produced;
4502           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4503             break;
4504         }
4505       if (source < source_end)
4506         src += source_byteidx[source - source_charbuf];
4507       else
4508         src = p;
4509       consumed_chars += source - source_charbuf;
4510
4511       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4512           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4513         break;
4514     }
4515
4516   switch (ccl.status)
4517     {
4518     case CCL_STAT_SUSPEND_BY_SRC:
4519       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4520       break;
4521     case CCL_STAT_SUSPEND_BY_DST:
4522       break;
4523     case CCL_STAT_QUIT:
4524     case CCL_STAT_INVALID_CMD:
4525       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4526       break;
4527     default:
4528       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4529       break;
4530     }
4531   coding->consumed_char += consumed_chars;
4532   coding->consumed = src - coding->source;
4533   coding->charbuf_used = charbuf - coding->charbuf;
4534 }
4535
4536 static int
4537 encode_coding_ccl (coding)
4538      struct coding_system *coding;
4539 {
4540   struct ccl_program ccl;
4541   int multibytep = coding->dst_multibyte;
4542   int *charbuf = coding->charbuf;
4543   int *charbuf_end = charbuf + coding->charbuf_used;
4544   unsigned char *dst = coding->destination + coding->produced;
4545   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4546   unsigned char *adjusted_dst_end = dst_end - 1;
4547   int destination_charbuf[1024];
4548   int i, produced_chars = 0;
4549   Lisp_Object attrs, charset_list;
4550
4551   CODING_GET_INFO (coding, attrs, charset_list);
4552   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4553
4554   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4555   ccl.dst_multibyte = coding->dst_multibyte;
4556
4557   while (charbuf < charbuf_end && dst < adjusted_dst_end)
4558     {
4559       int dst_bytes = dst_end - dst;
4560       if (dst_bytes > 1024)
4561         dst_bytes = 1024;
4562
4563       ccl_driver (&ccl, charbuf, destination_charbuf,
4564                   charbuf_end - charbuf, dst_bytes, charset_list);
4565       charbuf += ccl.consumed;
4566       if (multibytep)
4567         for (i = 0; i < ccl.produced; i++)
4568           EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4569       else
4570         {
4571           for (i = 0; i < ccl.produced; i++)
4572             *dst++ = destination_charbuf[i] & 0xFF;
4573           produced_chars += ccl.produced;
4574         }
4575     }
4576
4577   switch (ccl.status)
4578     {
4579     case CCL_STAT_SUSPEND_BY_SRC:
4580       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4581       break;
4582     case CCL_STAT_SUSPEND_BY_DST:
4583       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4584       break;
4585     case CCL_STAT_QUIT:
4586     case CCL_STAT_INVALID_CMD:
4587       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4588       break;
4589     default:
4590       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4591       break;
4592     }
4593
4594   coding->produced_char += produced_chars;
4595   coding->produced = dst - coding->destination;
4596   return 0;
4597 }
4598
4599
4600 \f
4601 /*** 10, 11. no-conversion handlers ***/
4602
4603 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4604
4605 static void
4606 decode_coding_raw_text (coding)
4607      struct coding_system *coding;
4608 {
4609   coding->chars_at_source = 1;
4610   coding->consumed_char = 0;
4611   coding->consumed = 0;
4612   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4613 }
4614
4615 static int
4616 encode_coding_raw_text (coding)
4617      struct coding_system *coding;
4618 {
4619   int multibytep = coding->dst_multibyte;
4620   int *charbuf = coding->charbuf;
4621   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4622   unsigned char *dst = coding->destination + coding->produced;
4623   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4624   int produced_chars = 0;
4625   int c;
4626
4627   if (multibytep)
4628     {
4629       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4630
4631       if (coding->src_multibyte)
4632         while (charbuf < charbuf_end)
4633           {
4634             ASSURE_DESTINATION (safe_room);
4635             c = *charbuf++;
4636             if (ASCII_CHAR_P (c))
4637               EMIT_ONE_ASCII_BYTE (c);
4638             else if (CHAR_BYTE8_P (c))
4639               {
4640                 c = CHAR_TO_BYTE8 (c);
4641                 EMIT_ONE_BYTE (c);
4642               }
4643             else
4644               {
4645                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4646
4647                 CHAR_STRING_ADVANCE (c, p1);
4648                 while (p0 < p1)
4649                   {
4650                     EMIT_ONE_BYTE (*p0);
4651                     p0++;
4652                   }
4653               }
4654           }
4655       else
4656         while (charbuf < charbuf_end)
4657           {
4658             ASSURE_DESTINATION (safe_room);
4659             c = *charbuf++;
4660             EMIT_ONE_BYTE (c);
4661           }
4662     }
4663   else
4664     {
4665       if (coding->src_multibyte)
4666         {
4667           int safe_room = MAX_MULTIBYTE_LENGTH;
4668
4669           while (charbuf < charbuf_end)
4670             {
4671               ASSURE_DESTINATION (safe_room);
4672               c = *charbuf++;
4673               if (ASCII_CHAR_P (c))
4674                 *dst++ = c;
4675               else if (CHAR_BYTE8_P (c))
4676                 *dst++ = CHAR_TO_BYTE8 (c);
4677               else
4678                 CHAR_STRING_ADVANCE (c, dst);
4679               produced_chars++;
4680             }
4681         }
4682       else
4683         {
4684           ASSURE_DESTINATION (charbuf_end - charbuf);
4685           while (charbuf < charbuf_end && dst < dst_end)
4686             *dst++ = *charbuf++;
4687           produced_chars = dst - (coding->destination + coding->dst_bytes);
4688         }
4689     }
4690   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4691   coding->produced_char += produced_chars;
4692   coding->produced = dst - coding->destination;
4693   return 0;
4694 }
4695
4696 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4697    Check if a text is encoded in a charset-based coding system.  If it
4698    is, return 1, else return 0.  */
4699
4700 static int
4701 detect_coding_charset (coding, detect_info)
4702      struct coding_system *coding;
4703      struct coding_detection_info *detect_info;
4704 {
4705   const unsigned char *src = coding->source, *src_base;
4706   const unsigned char *src_end = coding->source + coding->src_bytes;
4707   int multibytep = coding->src_multibyte;
4708   int consumed_chars = 0;
4709   Lisp_Object attrs, valids;
4710   int found = 0;
4711
4712   detect_info->checked |= CATEGORY_MASK_CHARSET;
4713
4714   coding = &coding_categories[coding_category_charset];
4715   attrs = CODING_ID_ATTRS (coding->id);
4716   valids = AREF (attrs, coding_attr_charset_valids);
4717
4718   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4719     src += coding->head_ascii;
4720
4721   while (1)
4722     {
4723       int c;
4724
4725       src_base = src;
4726       ONE_MORE_BYTE (c);
4727       if (c < 0)
4728         continue;
4729       if (NILP (AREF (valids, c)))
4730         break;
4731       if (c >= 0x80)
4732         found = CATEGORY_MASK_CHARSET;
4733     }
4734   detect_info->rejected |= CATEGORY_MASK_CHARSET;
4735   return 0;
4736
4737  no_more_source:
4738   detect_info->found |= found;
4739   return 1;
4740 }
4741
4742 static void
4743 decode_coding_charset (coding)
4744      struct coding_system *coding;
4745 {
4746   const unsigned char *src = coding->source + coding->consumed;
4747   const unsigned char *src_end = coding->source + coding->src_bytes;
4748   const unsigned char *src_base;
4749   int *charbuf = coding->charbuf + coding->charbuf_used;
4750   int *charbuf_end
4751     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4752   int consumed_chars = 0, consumed_chars_base;
4753   int multibytep = coding->src_multibyte;
4754   Lisp_Object attrs, charset_list, valids;
4755   int char_offset = coding->produced_char;
4756   int last_offset = char_offset;
4757   int last_id = charset_ascii;
4758
4759   CODING_GET_INFO (coding, attrs, charset_list);
4760   valids = AREF (attrs, coding_attr_charset_valids);
4761
4762   while (1)
4763     {
4764       int c;
4765       Lisp_Object val;
4766       struct charset *charset;
4767       int dim;
4768       int len = 1;
4769       unsigned code;
4770
4771       src_base = src;
4772       consumed_chars_base = consumed_chars;
4773
4774       if (charbuf >= charbuf_end)
4775         break;
4776
4777       ONE_MORE_BYTE (c);
4778       if (c < 0)
4779         goto invalid_code;
4780       code = c;
4781
4782       val = AREF (valids, c);
4783       if (NILP (val))
4784         goto invalid_code;
4785       if (INTEGERP (val))
4786         {
4787           charset = CHARSET_FROM_ID (XFASTINT (val));
4788           dim = CHARSET_DIMENSION (charset);
4789           while (len < dim)
4790             {
4791               ONE_MORE_BYTE (c);
4792               code = (code << 8) | c;
4793               len++;
4794             }
4795           CODING_DECODE_CHAR (coding, src, src_base, src_end,
4796                               charset, code, c);
4797         }
4798       else
4799         {
4800           /* VAL is a list of charset IDs.  It is assured that the
4801              list is sorted by charset dimensions (smaller one
4802              comes first).  */
4803           while (CONSP (val))
4804             {
4805               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4806               dim = CHARSET_DIMENSION (charset);
4807               while (len < dim)
4808                 {
4809                   ONE_MORE_BYTE (c);
4810                   code = (code << 8) | c;
4811                   len++;
4812                 }
4813               CODING_DECODE_CHAR (coding, src, src_base,
4814                                   src_end, charset, code, c);
4815               if (c >= 0)
4816                 break;
4817               val = XCDR (val);
4818             }
4819         }
4820       if (c < 0)
4821         goto invalid_code;
4822       if (charset->id != charset_ascii
4823           && last_id != charset->id)
4824         {
4825           if (last_id != charset_ascii)
4826             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4827           last_id = charset->id;
4828           last_offset = char_offset;
4829         }
4830
4831       *charbuf++ = c;
4832       char_offset++;
4833       continue;
4834
4835     invalid_code:
4836       src = src_base;
4837       consumed_chars = consumed_chars_base;
4838       ONE_MORE_BYTE (c);
4839       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4840       char_offset++;
4841       coding->errors++;
4842     }
4843
4844  no_more_source:
4845   if (last_id != charset_ascii)
4846     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4847   coding->consumed_char += consumed_chars_base;
4848   coding->consumed = src_base - coding->source;
4849   coding->charbuf_used = charbuf - coding->charbuf;
4850 }
4851
4852 static int
4853 encode_coding_charset (coding)
4854      struct coding_system *coding;
4855 {
4856   int multibytep = coding->dst_multibyte;
4857   int *charbuf = coding->charbuf;
4858   int *charbuf_end = charbuf + coding->charbuf_used;
4859   unsigned char *dst = coding->destination + coding->produced;
4860   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4861   int safe_room = MAX_MULTIBYTE_LENGTH;
4862   int produced_chars = 0;
4863   Lisp_Object attrs, charset_list;
4864   int ascii_compatible;
4865   int c;
4866
4867   CODING_GET_INFO (coding, attrs, charset_list);
4868   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4869
4870   while (charbuf < charbuf_end)
4871     {
4872       struct charset *charset;
4873       unsigned code;
4874
4875       ASSURE_DESTINATION (safe_room);
4876       c = *charbuf++;
4877       if (ascii_compatible && ASCII_CHAR_P (c))
4878         EMIT_ONE_ASCII_BYTE (c);
4879       else if (CHAR_BYTE8_P (c))
4880         {
4881           c = CHAR_TO_BYTE8 (c);
4882           EMIT_ONE_BYTE (c);
4883         }
4884       else
4885         {
4886           charset = char_charset (c, charset_list, &code);
4887           if (charset)
4888             {
4889               if (CHARSET_DIMENSION (charset) == 1)
4890                 EMIT_ONE_BYTE (code);
4891               else if (CHARSET_DIMENSION (charset) == 2)
4892                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4893               else if (CHARSET_DIMENSION (charset) == 3)
4894                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4895               else
4896                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4897                                  (code >> 8) & 0xFF, code & 0xFF);
4898             }
4899           else
4900             {
4901               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4902                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4903               else
4904                 c = coding->default_char;
4905               EMIT_ONE_BYTE (c);
4906             }
4907         }
4908     }
4909
4910   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4911   coding->produced_char += produced_chars;
4912   coding->produced = dst - coding->destination;
4913   return 0;
4914 }
4915
4916 \f
4917 /*** 7. C library functions ***/
4918
4919 /* Setup coding context CODING from information about CODING_SYSTEM.
4920    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
4921    CODING_SYSTEM is invalid, signal an error.  */
4922
4923 void
4924 setup_coding_system (coding_system, coding)
4925      Lisp_Object coding_system;
4926      struct coding_system *coding;
4927 {
4928   Lisp_Object attrs;
4929   Lisp_Object eol_type;
4930   Lisp_Object coding_type;
4931   Lisp_Object val;
4932
4933   if (NILP (coding_system))
4934     coding_system = Qno_conversion;
4935
4936   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4937
4938   attrs = CODING_ID_ATTRS (coding->id);
4939   eol_type = CODING_ID_EOL_TYPE (coding->id);
4940
4941   coding->mode = 0;
4942   coding->head_ascii = -1;
4943   coding->common_flags
4944     = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4945   if (! NILP (CODING_ATTR_POST_READ (attrs)))
4946     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
4947   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
4948     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
4949   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
4950     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4951
4952   val = CODING_ATTR_SAFE_CHARSETS (attrs);
4953   coding->max_charset_id = SCHARS (val) - 1;
4954   coding->safe_charsets = (char *) SDATA (val);
4955   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4956
4957   coding_type = CODING_ATTR_TYPE (attrs);
4958   if (EQ (coding_type, Qundecided))
4959     {
4960       coding->detector = NULL;
4961       coding->decoder = decode_coding_raw_text;
4962       coding->encoder = encode_coding_raw_text;
4963       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4964     }
4965   else if (EQ (coding_type, Qiso_2022))
4966     {
4967       int i;
4968       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4969
4970       /* Invoke graphic register 0 to plane 0.  */
4971       CODING_ISO_INVOCATION (coding, 0) = 0;
4972       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
4973       CODING_ISO_INVOCATION (coding, 1)
4974         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4975       /* Setup the initial status of designation.  */
4976       for (i = 0; i < 4; i++)
4977         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4978       /* Not single shifting initially.  */
4979       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4980       /* Beginning of buffer should also be regarded as bol. */
4981       CODING_ISO_BOL (coding) = 1;
4982       coding->detector = detect_coding_iso_2022;
4983       coding->decoder = decode_coding_iso_2022;
4984       coding->encoder = encode_coding_iso_2022;
4985       if (flags & CODING_ISO_FLAG_SAFE)
4986         coding->mode |= CODING_MODE_SAFE_ENCODING;
4987       coding->common_flags
4988         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4989             | CODING_REQUIRE_FLUSHING_MASK);
4990       if (flags & CODING_ISO_FLAG_COMPOSITION)
4991         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4992       if (flags & CODING_ISO_FLAG_DESIGNATION)
4993         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
4994       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4995         {
4996           setup_iso_safe_charsets (attrs);
4997           val = CODING_ATTR_SAFE_CHARSETS (attrs);
4998           coding->max_charset_id = SCHARS (val) - 1;
4999           coding->safe_charsets = (char *) SDATA (val);
5000         }
5001       CODING_ISO_FLAGS (coding) = flags;
5002     }
5003   else if (EQ (coding_type, Qcharset))
5004     {
5005       coding->detector = detect_coding_charset;
5006       coding->decoder = decode_coding_charset;
5007       coding->encoder = encode_coding_charset;
5008       coding->common_flags
5009         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5010     }
5011   else if (EQ (coding_type, Qutf_8))
5012     {
5013       coding->detector = detect_coding_utf_8;
5014       coding->decoder = decode_coding_utf_8;
5015       coding->encoder = encode_coding_utf_8;
5016       coding->common_flags
5017         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5018     }
5019   else if (EQ (coding_type, Qutf_16))
5020     {
5021       val = AREF (attrs, coding_attr_utf_16_bom);
5022       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
5023                                     : EQ (val, Qt) ? utf_16_with_bom
5024                                     : utf_16_without_bom);
5025       val = AREF (attrs, coding_attr_utf_16_endian);
5026       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5027                                        : utf_16_little_endian);
5028       CODING_UTF_16_SURROGATE (coding) = 0;
5029       coding->detector = detect_coding_utf_16;
5030       coding->decoder = decode_coding_utf_16;
5031       coding->encoder = encode_coding_utf_16;
5032       coding->common_flags
5033         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5034       if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
5035         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5036     }
5037   else if (EQ (coding_type, Qccl))
5038     {
5039       coding->detector = detect_coding_ccl;
5040       coding->decoder = decode_coding_ccl;
5041       coding->encoder = encode_coding_ccl;
5042       coding->common_flags
5043         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5044             | CODING_REQUIRE_FLUSHING_MASK);
5045     }
5046   else if (EQ (coding_type, Qemacs_mule))
5047     {
5048       coding->detector = detect_coding_emacs_mule;
5049       coding->decoder = decode_coding_emacs_mule;
5050       coding->encoder = encode_coding_emacs_mule;
5051       coding->common_flags
5052         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5053       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5054           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5055         {
5056           Lisp_Object tail, safe_charsets;
5057           int max_charset_id = 0;
5058
5059           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5060                tail = XCDR (tail))
5061             if (max_charset_id < XFASTINT (XCAR (tail)))
5062               max_charset_id = XFASTINT (XCAR (tail));
5063           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5064                                         make_number (255));
5065           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5066                tail = XCDR (tail))
5067             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5068           coding->max_charset_id = max_charset_id;
5069           coding->safe_charsets = (char *) SDATA (safe_charsets);
5070         }
5071     }
5072   else if (EQ (coding_type, Qshift_jis))
5073     {
5074       coding->detector = detect_coding_sjis;
5075       coding->decoder = decode_coding_sjis;
5076       coding->encoder = encode_coding_sjis;
5077       coding->common_flags
5078         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5079     }
5080   else if (EQ (coding_type, Qbig5))
5081     {
5082       coding->detector = detect_coding_big5;
5083       coding->decoder = decode_coding_big5;
5084       coding->encoder = encode_coding_big5;
5085       coding->common_flags
5086         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5087     }
5088   else                          /* EQ (coding_type, Qraw_text) */
5089     {
5090       coding->detector = NULL;
5091       coding->decoder = decode_coding_raw_text;
5092       coding->encoder = encode_coding_raw_text;
5093     }
5094
5095   return;
5096 }
5097
5098 /* Return raw-text or one of its subsidiaries that has the same
5099    eol_type as CODING-SYSTEM.  */
5100
5101 Lisp_Object
5102 raw_text_coding_system (coding_system)
5103      Lisp_Object coding_system;
5104 {
5105   Lisp_Object spec, attrs;
5106   Lisp_Object eol_type, raw_text_eol_type;
5107
5108   if (NILP (coding_system))
5109     return Qraw_text;
5110   spec = CODING_SYSTEM_SPEC (coding_system);
5111   attrs = AREF (spec, 0);
5112
5113   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5114     return coding_system;
5115
5116   eol_type = AREF (spec, 2);
5117   if (VECTORP (eol_type))
5118     return Qraw_text;
5119   spec = CODING_SYSTEM_SPEC (Qraw_text);
5120   raw_text_eol_type = AREF (spec, 2);
5121   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5122           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5123           : AREF (raw_text_eol_type, 2));
5124 }
5125
5126
5127 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5128    does, return one of the subsidiary that has the same eol-spec as
5129    PARENT.  Otherwise, return CODING_SYSTEM.  */
5130
5131 Lisp_Object
5132 coding_inherit_eol_type (coding_system, parent)
5133      Lisp_Object coding_system, parent;
5134 {
5135   Lisp_Object spec, eol_type;
5136
5137   if (NILP (coding_system))
5138     coding_system = Qraw_text;
5139   spec = CODING_SYSTEM_SPEC (coding_system);
5140   eol_type = AREF (spec, 2);
5141   if (VECTORP (eol_type)
5142       && ! NILP (parent))
5143     {
5144       Lisp_Object parent_spec;
5145       Lisp_Object parent_eol_type;
5146
5147       parent_spec
5148         = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
5149       parent_eol_type = AREF (parent_spec, 2);
5150       if (EQ (parent_eol_type, Qunix))
5151         coding_system = AREF (eol_type, 0);
5152       else if (EQ (parent_eol_type, Qdos))
5153         coding_system = AREF (eol_type, 1);
5154       else if (EQ (parent_eol_type, Qmac))
5155         coding_system = AREF (eol_type, 2);
5156     }
5157   return coding_system;
5158 }
5159
5160 /* Emacs has a mechanism to automatically detect a coding system if it
5161    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5162    it's impossible to distinguish some coding systems accurately
5163    because they use the same range of codes.  So, at first, coding
5164    systems are categorized into 7, those are:
5165
5166    o coding-category-emacs-mule
5167
5168         The category for a coding system which has the same code range
5169         as Emacs' internal format.  Assigned the coding-system (Lisp
5170         symbol) `emacs-mule' by default.
5171
5172    o coding-category-sjis
5173
5174         The category for a coding system which has the same code range
5175         as SJIS.  Assigned the coding-system (Lisp
5176         symbol) `japanese-shift-jis' by default.
5177
5178    o coding-category-iso-7
5179
5180         The category for a coding system which has the same code range
5181         as ISO2022 of 7-bit environment.  This doesn't use any locking
5182         shift and single shift functions.  This can encode/decode all
5183         charsets.  Assigned the coding-system (Lisp symbol)
5184         `iso-2022-7bit' by default.
5185
5186    o coding-category-iso-7-tight
5187
5188         Same as coding-category-iso-7 except that this can
5189         encode/decode only the specified charsets.
5190
5191    o coding-category-iso-8-1
5192
5193         The category for a coding system which has the same code range
5194         as ISO2022 of 8-bit environment and graphic plane 1 used only
5195         for DIMENSION1 charset.  This doesn't use any locking shift
5196         and single shift functions.  Assigned the coding-system (Lisp
5197         symbol) `iso-latin-1' by default.
5198
5199    o coding-category-iso-8-2
5200
5201         The category for a coding system which has the same code range
5202         as ISO2022 of 8-bit environment and graphic plane 1 used only
5203         for DIMENSION2 charset.  This doesn't use any locking shift
5204         and single shift functions.  Assigned the coding-system (Lisp
5205         symbol) `japanese-iso-8bit' by default.
5206
5207    o coding-category-iso-7-else
5208
5209         The category for a coding system which has the same code range
5210         as ISO2022 of 7-bit environemnt but uses locking shift or
5211         single shift functions.  Assigned the coding-system (Lisp
5212         symbol) `iso-2022-7bit-lock' by default.
5213
5214    o coding-category-iso-8-else
5215
5216         The category for a coding system which has the same code range
5217         as ISO2022 of 8-bit environemnt but uses locking shift or
5218         single shift functions.  Assigned the coding-system (Lisp
5219         symbol) `iso-2022-8bit-ss2' by default.
5220
5221    o coding-category-big5
5222
5223         The category for a coding system which has the same code range
5224         as BIG5.  Assigned the coding-system (Lisp symbol)
5225         `cn-big5' by default.
5226
5227    o coding-category-utf-8
5228
5229         The category for a coding system which has the same code range
5230         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
5231         symbol) `utf-8' by default.
5232
5233    o coding-category-utf-16-be
5234
5235         The category for a coding system in which a text has an
5236         Unicode signature (cf. Unicode Standard) in the order of BIG
5237         endian at the head.  Assigned the coding-system (Lisp symbol)
5238         `utf-16-be' by default.
5239
5240    o coding-category-utf-16-le
5241
5242         The category for a coding system in which a text has an
5243         Unicode signature (cf. Unicode Standard) in the order of
5244         LITTLE endian at the head.  Assigned the coding-system (Lisp
5245         symbol) `utf-16-le' by default.
5246
5247    o coding-category-ccl
5248
5249         The category for a coding system of which encoder/decoder is
5250         written in CCL programs.  The default value is nil, i.e., no
5251         coding system is assigned.
5252
5253    o coding-category-binary
5254
5255         The category for a coding system not categorized in any of the
5256         above.  Assigned the coding-system (Lisp symbol)
5257         `no-conversion' by default.
5258
5259    Each of them is a Lisp symbol and the value is an actual
5260    `coding-system's (this is also a Lisp symbol) assigned by a user.
5261    What Emacs does actually is to detect a category of coding system.
5262    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5263    decide only one possible category, it selects a category of the
5264    highest priority.  Priorities of categories are also specified by a
5265    user in a Lisp variable `coding-category-list'.
5266
5267 */
5268
5269 #define EOL_SEEN_NONE   0
5270 #define EOL_SEEN_LF     1
5271 #define EOL_SEEN_CR     2
5272 #define EOL_SEEN_CRLF   4
5273
5274 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5275    SOURCE is encoded.  If CATEGORY is one of
5276    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5277    two-byte, else they are encoded by one-byte.
5278
5279    Return one of EOL_SEEN_XXX.  */
5280
5281 #define MAX_EOL_CHECK_COUNT 3
5282
5283 static int
5284 detect_eol (source, src_bytes, category)
5285      const unsigned char *source;
5286      EMACS_INT src_bytes;
5287      enum coding_category category;
5288 {
5289   const unsigned char *src = source, *src_end = src + src_bytes;
5290   unsigned char c;
5291   int total  = 0;
5292   int eol_seen = EOL_SEEN_NONE;
5293
5294   if ((1 << category) & CATEGORY_MASK_UTF_16)
5295     {
5296       int msb, lsb;
5297
5298       msb = category == (coding_category_utf_16_le
5299                          | coding_category_utf_16_le_nosig);
5300       lsb = 1 - msb;
5301
5302       while (src + 1 < src_end)
5303         {
5304           c = src[lsb];
5305           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5306             {
5307               int this_eol;
5308
5309               if (c == '\n')
5310                 this_eol = EOL_SEEN_LF;
5311               else if (src + 3 >= src_end
5312                        || src[msb + 2] != 0
5313                        || src[lsb + 2] != '\n')
5314                 this_eol = EOL_SEEN_CR;
5315               else
5316                 this_eol = EOL_SEEN_CRLF;
5317
5318               if (eol_seen == EOL_SEEN_NONE)
5319                 /* This is the first end-of-line.  */
5320                 eol_seen = this_eol;
5321               else if (eol_seen != this_eol)
5322                 {
5323                   /* The found type is different from what found before.  */
5324                   eol_seen = EOL_SEEN_LF;
5325                   break;
5326                 }
5327               if (++total == MAX_EOL_CHECK_COUNT)
5328                 break;
5329             }
5330           src += 2;
5331         }
5332     }
5333   else
5334     {
5335       while (src < src_end)
5336         {
5337           c = *src++;
5338           if (c == '\n' || c == '\r')
5339             {
5340               int this_eol;
5341
5342               if (c == '\n')
5343                 this_eol = EOL_SEEN_LF;
5344               else if (src >= src_end || *src != '\n')
5345                 this_eol = EOL_SEEN_CR;
5346               else
5347                 this_eol = EOL_SEEN_CRLF, src++;
5348
5349               if (eol_seen == EOL_SEEN_NONE)
5350                 /* This is the first end-of-line.  */
5351                 eol_seen = this_eol;
5352               else if (eol_seen != this_eol)
5353                 {
5354                   /* The found type is different from what found before.  */
5355                   eol_seen = EOL_SEEN_LF;
5356                   break;
5357                 }
5358               if (++total == MAX_EOL_CHECK_COUNT)
5359                 break;
5360             }
5361         }
5362     }
5363   return eol_seen;
5364 }
5365
5366
5367 static Lisp_Object
5368 adjust_coding_eol_type (coding, eol_seen)
5369      struct coding_system *coding;
5370      int eol_seen;
5371 {
5372   Lisp_Object eol_type;
5373
5374   eol_type = CODING_ID_EOL_TYPE (coding->id);
5375   if (eol_seen & EOL_SEEN_LF)
5376     {
5377       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5378       eol_type = Qunix;
5379     }
5380   else if (eol_seen & EOL_SEEN_CRLF)
5381     {
5382       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5383       eol_type = Qdos;
5384     }
5385   else if (eol_seen & EOL_SEEN_CR)
5386     {
5387       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5388       eol_type = Qmac;
5389     }
5390   return eol_type;
5391 }
5392
5393 /* Detect how a text specified in CODING is encoded.  If a coding
5394    system is detected, update fields of CODING by the detected coding
5395    system.  */
5396
5397 void
5398 detect_coding (coding)
5399      struct coding_system *coding;
5400 {
5401   const unsigned char *src, *src_end;
5402   Lisp_Object attrs, coding_type;
5403
5404   coding->consumed = coding->consumed_char = 0;
5405   coding->produced = coding->produced_char = 0;
5406   coding_set_source (coding);
5407
5408   src_end = coding->source + coding->src_bytes;
5409
5410   /* If we have not yet decided the text encoding type, detect it
5411      now.  */
5412   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5413     {
5414       int c, i;
5415
5416       for (i = 0, src = coding->source; src < src_end; i++, src++)
5417         {
5418           c = *src;
5419           if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5420                                         || c == ISO_CODE_SI
5421                                         || c == ISO_CODE_SO)))
5422             break;
5423         }
5424       coding->head_ascii = src - (coding->source + coding->consumed);
5425
5426       if (coding->head_ascii < coding->src_bytes)
5427         {
5428           struct coding_detection_info detect_info;
5429           enum coding_category category;
5430           struct coding_system *this;
5431
5432           detect_info.checked = detect_info.found = detect_info.rejected = 0;
5433           for (i = 0; i < coding_category_raw_text; i++)
5434             {
5435               category = coding_priorities[i];
5436               this = coding_categories + category;
5437               if (this->id < 0)
5438                 {
5439                   /* No coding system of this category is defined.  */
5440                   detect_info.rejected |= (1 << category);
5441                 }
5442               else if (category >= coding_category_raw_text)
5443                 continue;
5444               else if (detect_info.checked & (1 << category))
5445                 {
5446                   if (detect_info.found & (1 << category))
5447                     break;
5448                 }
5449               else if ((*(this->detector)) (coding, &detect_info)
5450                        && detect_info.found & (1 << category))
5451                 {
5452                   if (category == coding_category_utf_16_auto)
5453                     {
5454                       if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5455                         category = coding_category_utf_16_le;
5456                       else
5457                         category = coding_category_utf_16_be;
5458                     }
5459                   break;
5460                 }
5461             }
5462           if (i < coding_category_raw_text)
5463             setup_coding_system (CODING_ID_NAME (this->id), coding);
5464           else if (detect_info.rejected == CATEGORY_MASK_ANY)
5465             setup_coding_system (Qraw_text, coding);
5466           else if (detect_info.rejected)
5467             for (i = 0; i < coding_category_raw_text; i++)
5468               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5469                 {
5470                   this = coding_categories + coding_priorities[i];
5471                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5472                   break;
5473                 }
5474         }
5475     }
5476   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5477            == coding_category_utf_16_auto)
5478     {
5479       Lisp_Object coding_systems;
5480       struct coding_detection_info detect_info;
5481
5482       coding_systems
5483         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5484       detect_info.found = detect_info.rejected = 0;
5485       if (CONSP (coding_systems)
5486           && detect_coding_utf_16 (coding, &detect_info))
5487         {
5488           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5489             setup_coding_system (XCAR (coding_systems), coding);
5490           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
5491             setup_coding_system (XCDR (coding_systems), coding);
5492         }
5493     }
5494 }
5495
5496
5497 static void
5498 decode_eol (coding)
5499      struct coding_system *coding;
5500 {
5501   Lisp_Object eol_type;
5502   unsigned char *p, *pbeg, *pend;
5503
5504   eol_type = CODING_ID_EOL_TYPE (coding->id);
5505   if (EQ (eol_type, Qunix))
5506     return;
5507
5508   if (NILP (coding->dst_object))
5509     pbeg = coding->destination;
5510   else
5511     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5512   pend = pbeg + coding->produced;
5513
5514   if (VECTORP (eol_type))
5515     {
5516       int eol_seen = EOL_SEEN_NONE;
5517
5518       for (p = pbeg; p < pend; p++)
5519         {
5520           if (*p == '\n')
5521             eol_seen |= EOL_SEEN_LF;
5522           else if (*p == '\r')
5523             {
5524               if (p + 1 < pend && *(p + 1) == '\n')
5525                 {
5526                   eol_seen |= EOL_SEEN_CRLF;
5527                   p++;
5528                 }
5529               else
5530                 eol_seen |= EOL_SEEN_CR;
5531             }
5532         }
5533       if (eol_seen != EOL_SEEN_NONE
5534           && eol_seen != EOL_SEEN_LF
5535           && eol_seen != EOL_SEEN_CRLF
5536           && eol_seen != EOL_SEEN_CR)
5537         eol_seen = EOL_SEEN_LF;
5538       if (eol_seen != EOL_SEEN_NONE)
5539         eol_type = adjust_coding_eol_type (coding, eol_seen);
5540     }
5541
5542   if (EQ (eol_type, Qmac))
5543     {
5544       for (p = pbeg; p < pend; p++)
5545         if (*p == '\r')
5546           *p = '\n';
5547     }
5548   else if (EQ (eol_type, Qdos))
5549     {
5550       int n = 0;
5551
5552       if (NILP (coding->dst_object))
5553         {
5554           for (p = pend - 2; p >= pbeg; p--)
5555             if (*p == '\r')
5556               {
5557                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5558                 n++;
5559               }
5560         }
5561       else
5562         {
5563           for (p = pend - 2; p >= pbeg; p--)
5564             if (*p == '\r')
5565               {
5566                 int pos_byte = coding->dst_pos_byte + (p - pbeg);
5567                 int pos = BYTE_TO_CHAR (pos_byte);
5568
5569                 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5570                 n++;
5571               }
5572         }
5573       coding->produced -= n;
5574       coding->produced_char -= n;
5575     }
5576 }
5577
5578
5579 /* Return a translation table (or list of them) from coding system
5580    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5581    decoding (ENCODEP is zero). */
5582
5583 static Lisp_Object
5584 get_translation_table (attrs, encodep, max_lookup)
5585      Lisp_Object attrs;
5586      int encodep, *max_lookup;
5587 {
5588   Lisp_Object standard, translation_table;
5589   Lisp_Object val;
5590
5591   if (encodep)
5592     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
5593       standard = Vstandard_translation_table_for_encode;
5594   else
5595     translation_table = CODING_ATTR_DECODE_TBL (attrs),
5596       standard = Vstandard_translation_table_for_decode;
5597   if (NILP (translation_table))
5598     translation_table = standard;
5599   else
5600     {
5601       if (SYMBOLP (translation_table))
5602         translation_table = Fget (translation_table, Qtranslation_table);
5603       else if (CONSP (translation_table))
5604         {
5605           translation_table = Fcopy_sequence (translation_table);
5606           for (val = translation_table; CONSP (val); val = XCDR (val))
5607             if (SYMBOLP (XCAR (val)))
5608               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
5609         }
5610       if (CHAR_TABLE_P (standard))
5611         {
5612           if (CONSP (translation_table))
5613             translation_table = nconc2 (translation_table,
5614                                         Fcons (standard, Qnil));
5615           else
5616             translation_table = Fcons (translation_table,
5617                                        Fcons (standard, Qnil));
5618         }
5619     }
5620
5621   if (max_lookup)
5622     {
5623       *max_lookup = 1;
5624       if (CHAR_TABLE_P (translation_table)
5625           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
5626         {
5627           val = XCHAR_TABLE (translation_table)->extras[1];
5628           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5629             *max_lookup = XFASTINT (val);
5630         }
5631       else if (CONSP (translation_table))
5632         {
5633           Lisp_Object tail, val;
5634
5635           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
5636             if (CHAR_TABLE_P (XCAR (tail))
5637                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
5638               {
5639                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
5640                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5641                   *max_lookup = XFASTINT (val);
5642               }
5643         }
5644     }
5645   return translation_table;
5646 }
5647
5648 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
5649   do {                                                          \
5650     trans = Qnil;                                               \
5651     if (CHAR_TABLE_P (table))                                   \
5652       {                                                         \
5653         trans = CHAR_TABLE_REF (table, c);                      \
5654         if (CHARACTERP (trans))                                 \
5655           c = XFASTINT (trans), trans = Qnil;                   \
5656       }                                                         \
5657     else if (CONSP (table))                                     \
5658       {                                                         \
5659         Lisp_Object tail;                                       \
5660                                                                 \
5661         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
5662           if (CHAR_TABLE_P (XCAR (tail)))                       \
5663             {                                                   \
5664               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
5665               if (CHARACTERP (trans))                           \
5666                 c = XFASTINT (trans), trans = Qnil;             \
5667               else if (! NILP (trans))                          \
5668                 break;                                          \
5669             }                                                   \
5670       }                                                         \
5671   } while (0)
5672
5673
5674 static Lisp_Object
5675 get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
5676      Lisp_Object val;
5677      int *buf, *buf_end;
5678      int last_block;
5679      int *from_nchars, *to_nchars;
5680 {
5681   /* VAL is TO or (([FROM-CHAR ...] .  TO) ...) where TO is TO-CHAR or
5682      [TO-CHAR ...].  */
5683   if (CONSP (val))
5684     {
5685       Lisp_Object from, tail;
5686       int i, len;
5687
5688       for (tail = val; CONSP (tail); tail = XCDR (tail))
5689         {
5690           val = XCAR (tail);
5691           from = XCAR (val);
5692           len = ASIZE (from);
5693           for (i = 0; i < len; i++)
5694             {
5695               if (buf + i == buf_end)
5696                 {
5697                   if (! last_block)
5698                     return Qt;
5699                   break;
5700                 }
5701               if (XINT (AREF (from, i)) != buf[i])
5702                 break;
5703             }
5704           if (i == len)
5705             {
5706               val = XCDR (val);
5707               *from_nchars = len;
5708               break;
5709             }
5710         }
5711       if (! CONSP (tail))
5712         return Qnil;
5713     }
5714   if (VECTORP (val))
5715     *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
5716   else
5717     *buf = XINT (val);
5718   return val;
5719 }
5720
5721
5722 static int
5723 produce_chars (coding, translation_table, last_block)
5724      struct coding_system *coding;
5725      Lisp_Object translation_table;
5726      int last_block;
5727 {
5728   unsigned char *dst = coding->destination + coding->produced;
5729   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5730   int produced;
5731   int produced_chars = 0;
5732   int carryover = 0;
5733
5734   if (! coding->chars_at_source)
5735     {
5736       /* Characters are in coding->charbuf.  */
5737       int *buf = coding->charbuf;
5738       int *buf_end = buf + coding->charbuf_used;
5739
5740       if (BUFFERP (coding->src_object)
5741           && EQ (coding->src_object, coding->dst_object))
5742         dst_end = ((unsigned char *) coding->source) + coding->consumed;
5743
5744       while (buf < buf_end)
5745         {
5746           int c = *buf, i;
5747
5748           if (c >= 0)
5749             {
5750               int from_nchars = 1, to_nchars = 1;
5751               Lisp_Object trans = Qnil;
5752
5753               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
5754               if (! NILP (trans))
5755                 {
5756                   trans = get_translation (trans, buf, buf_end, last_block,
5757                                            &from_nchars, &to_nchars);
5758                   if (EQ (trans, Qt))
5759                     break;
5760                   c = *buf;
5761                 }
5762
5763               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
5764                 {
5765                   dst = alloc_destination (coding,
5766                                            buf_end - buf
5767                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
5768                                            dst);
5769                   dst_end = coding->destination + coding->dst_bytes;
5770                 }
5771
5772               for (i = 0; i < to_nchars; i++)
5773                 {
5774                   if (i > 0)
5775                     c = XINT (AREF (trans, i));
5776                   if (coding->dst_multibyte
5777                       || ! CHAR_BYTE8_P (c))
5778                     CHAR_STRING_ADVANCE (c, dst);
5779                   else
5780                     *dst++ = CHAR_TO_BYTE8 (c);
5781                 }
5782               produced_chars += to_nchars;
5783               *buf++ = to_nchars;
5784               while (--from_nchars > 0)
5785                 *buf++ = 0;
5786             }
5787           else
5788             /* This is an annotation datum.  (-C) is the length.  */
5789             buf += -c;
5790         }
5791       carryover = buf_end - buf;
5792     }
5793   else
5794     {
5795       const unsigned char *src = coding->source;
5796       const unsigned char *src_end = src + coding->src_bytes;
5797       Lisp_Object eol_type;
5798
5799       eol_type = CODING_ID_EOL_TYPE (coding->id);
5800
5801       if (coding->src_multibyte != coding->dst_multibyte)
5802         {
5803           if (coding->src_multibyte)
5804             {
5805               int multibytep = 1;
5806               int consumed_chars;
5807
5808               while (1)
5809                 {
5810                   const unsigned char *src_base = src;
5811                   int c;
5812
5813                   ONE_MORE_BYTE (c);
5814                   if (c == '\r')
5815                     {
5816                       if (EQ (eol_type, Qdos))
5817                         {
5818                           if (src == src_end)
5819                             {
5820                               record_conversion_result
5821                                 (coding, CODING_RESULT_INSUFFICIENT_SRC);
5822                               goto no_more_source;
5823                             }
5824                           if (*src == '\n')
5825                             c = *src++;
5826                         }
5827                       else if (EQ (eol_type, Qmac))
5828                         c = '\n';
5829                     }
5830                   if (dst == dst_end)
5831                     {
5832                       coding->consumed = src - coding->source;
5833
5834                     if (EQ (coding->src_object, coding->dst_object))
5835                       dst_end = (unsigned char *) src;
5836                     if (dst == dst_end)
5837                       {
5838                         dst = alloc_destination (coding, src_end - src + 1,
5839                                                  dst);
5840                         dst_end = coding->destination + coding->dst_bytes;
5841                         coding_set_source (coding);
5842                         src = coding->source + coding->consumed;
5843                         src_end = coding->source + coding->src_bytes;
5844                       }
5845                     }
5846                   *dst++ = c;
5847                   produced_chars++;
5848                 }
5849             no_more_source:
5850               ;
5851             }
5852           else
5853             while (src < src_end)
5854               {
5855                 int multibytep = 1;
5856                 int c = *src++;
5857
5858                 if (c == '\r')
5859                   {
5860                     if (EQ (eol_type, Qdos))
5861                       {
5862                         if (src < src_end
5863                             && *src == '\n')
5864                           c = *src++;
5865                       }
5866                     else if (EQ (eol_type, Qmac))
5867                       c = '\n';
5868                   }
5869                 if (dst >= dst_end - 1)
5870                   {
5871                     coding->consumed = src - coding->source;
5872
5873                     if (EQ (coding->src_object, coding->dst_object))
5874                       dst_end = (unsigned char *) src;
5875                     if (dst >= dst_end - 1)
5876                       {
5877                         dst = alloc_destination (coding, src_end - src + 2,
5878                                                  dst);
5879                         dst_end = coding->destination + coding->dst_bytes;
5880                         coding_set_source (coding);
5881                         src = coding->source + coding->consumed;
5882                         src_end = coding->source + coding->src_bytes;
5883                       }
5884                   }
5885                 EMIT_ONE_BYTE (c);
5886               }
5887         }
5888       else
5889         {
5890           if (!EQ (coding->src_object, coding->dst_object))
5891             {
5892               int require = coding->src_bytes - coding->dst_bytes;
5893
5894               if (require > 0)
5895                 {
5896                   EMACS_INT offset = src - coding->source;
5897
5898                   dst = alloc_destination (coding, require, dst);
5899                   coding_set_source (coding);
5900                   src = coding->source + offset;
5901                   src_end = coding->source + coding->src_bytes;
5902                 }
5903             }
5904           produced_chars = coding->src_chars;
5905           while (src < src_end)
5906             {
5907               int c = *src++;
5908
5909               if (c == '\r')
5910                 {
5911                   if (EQ (eol_type, Qdos))
5912                     {
5913                       if (src < src_end
5914                           && *src == '\n')
5915                         c = *src++;
5916                       produced_chars--;
5917                     }
5918                   else if (EQ (eol_type, Qmac))
5919                     c = '\n';
5920                 }
5921               *dst++ = c;
5922             }
5923         }
5924       coding->consumed = coding->src_bytes;
5925       coding->consumed_char = coding->src_chars;
5926     }
5927
5928   produced = dst - (coding->destination + coding->produced);
5929   if (BUFFERP (coding->dst_object))
5930     insert_from_gap (produced_chars, produced);
5931   coding->produced += produced;
5932   coding->produced_char += produced_chars;
5933   return carryover;
5934 }
5935
5936 /* Compose text in CODING->object according to the annotation data at
5937    CHARBUF.  CHARBUF is an array:
5938      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5939  */
5940
5941 static INLINE void
5942 produce_composition (coding, charbuf, pos)
5943      struct coding_system *coding;
5944      int *charbuf;
5945      EMACS_INT pos;
5946 {
5947   int len;
5948   EMACS_INT to;
5949   enum composition_method method;
5950   Lisp_Object components;
5951
5952   len = -charbuf[0];
5953   to = pos + charbuf[2];
5954   if (to <= pos)
5955     return;
5956   method = (enum composition_method) (charbuf[3]);
5957
5958   if (method == COMPOSITION_RELATIVE)
5959     components = Qnil;
5960   else if (method >= COMPOSITION_WITH_RULE
5961            && method <= COMPOSITION_WITH_RULE_ALTCHARS)
5962     {
5963       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5964       int i;
5965
5966       len -= 4;
5967       charbuf += 4;
5968       for (i = 0; i < len; i++)
5969         {
5970           args[i] = make_number (charbuf[i]);
5971           if (args[i] < 0)
5972             return;
5973         }
5974       components = (method == COMPOSITION_WITH_ALTCHARS
5975                     ? Fstring (len, args) : Fvector (len, args));
5976     }
5977   else
5978     return;
5979   compose_text (pos, to, components, Qnil, coding->dst_object);
5980 }
5981
5982
5983 /* Put `charset' property on text in CODING->object according to
5984    the annotation data at CHARBUF.  CHARBUF is an array:
5985      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
5986  */
5987
5988 static INLINE void
5989 produce_charset (coding, charbuf, pos)
5990      struct coding_system *coding;
5991      int *charbuf;
5992      EMACS_INT pos;
5993 {
5994   EMACS_INT from = pos - charbuf[2];
5995   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
5996
5997   Fput_text_property (make_number (from), make_number (pos),
5998                       Qcharset, CHARSET_NAME (charset),
5999                       coding->dst_object);
6000 }
6001
6002
6003 #define CHARBUF_SIZE 0x4000
6004
6005 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6006   do {                                                                  \
6007     int size = CHARBUF_SIZE;;                                           \
6008                                                                         \
6009     coding->charbuf = NULL;                                             \
6010     while (size > 1024)                                                 \
6011       {                                                                 \
6012         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6013         if (coding->charbuf)                                            \
6014           break;                                                        \
6015         size >>= 1;                                                     \
6016       }                                                                 \
6017     if (! coding->charbuf)                                              \
6018       {                                                                 \
6019         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6020         return coding->result;                                          \
6021       }                                                                 \
6022     coding->charbuf_size = size;                                        \
6023   } while (0)
6024
6025
6026 static void
6027 produce_annotation (coding, pos)
6028      struct coding_system *coding;
6029      EMACS_INT pos;
6030 {
6031   int *charbuf = coding->charbuf;
6032   int *charbuf_end = charbuf + coding->charbuf_used;
6033
6034   if (NILP (coding->dst_object))
6035     return;
6036
6037   while (charbuf < charbuf_end)
6038     {
6039       if (*charbuf >= 0)
6040         pos += *charbuf++;
6041       else
6042         {
6043           int len = -*charbuf;
6044           switch (charbuf[1])
6045             {
6046             case CODING_ANNOTATE_COMPOSITION_MASK:
6047               produce_composition (coding, charbuf, pos);
6048               break;
6049             case CODING_ANNOTATE_CHARSET_MASK:
6050               produce_charset (coding, charbuf, pos);
6051               break;
6052             default:
6053               abort ();
6054             }
6055           charbuf += len;
6056         }
6057     }
6058 }
6059
6060 /* Decode the data at CODING->src_object into CODING->dst_object.
6061    CODING->src_object is a buffer, a string, or nil.
6062    CODING->dst_object is a buffer.
6063
6064    If CODING->src_object is a buffer, it must be the current buffer.
6065    In this case, if CODING->src_pos is positive, it is a position of
6066    the source text in the buffer, otherwise, the source text is in the
6067    gap area of the buffer, and CODING->src_pos specifies the offset of
6068    the text from GPT (which must be the same as PT).  If this is the
6069    same buffer as CODING->dst_object, CODING->src_pos must be
6070    negative.
6071
6072    If CODING->src_object is a string, CODING->src_pos in an index to
6073    that string.
6074
6075    If CODING->src_object is nil, CODING->source must already point to
6076    the non-relocatable memory area.  In this case, CODING->src_pos is
6077    an offset from CODING->source.
6078
6079    The decoded data is inserted at the current point of the buffer
6080    CODING->dst_object.
6081 */
6082
6083 static int
6084 decode_coding (coding)
6085      struct coding_system *coding;
6086 {
6087   Lisp_Object attrs;
6088   Lisp_Object undo_list;
6089   Lisp_Object translation_table;
6090   int carryover;
6091   int i;
6092
6093   if (BUFFERP (coding->src_object)
6094       && coding->src_pos > 0
6095       && coding->src_pos < GPT
6096       && coding->src_pos + coding->src_chars > GPT)
6097     move_gap_both (coding->src_pos, coding->src_pos_byte);
6098
6099   undo_list = Qt;
6100   if (BUFFERP (coding->dst_object))
6101     {
6102       if (current_buffer != XBUFFER (coding->dst_object))
6103         set_buffer_internal (XBUFFER (coding->dst_object));
6104       if (GPT != PT)
6105         move_gap_both (PT, PT_BYTE);
6106       undo_list = current_buffer->undo_list;
6107       current_buffer->undo_list = Qt;
6108     }
6109
6110   coding->consumed = coding->consumed_char = 0;
6111   coding->produced = coding->produced_char = 0;
6112   coding->chars_at_source = 0;
6113   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6114   coding->errors = 0;
6115
6116   ALLOC_CONVERSION_WORK_AREA (coding);
6117
6118   attrs = CODING_ID_ATTRS (coding->id);
6119   translation_table = get_translation_table (attrs, 0, NULL);
6120
6121   carryover = 0;
6122   do
6123     {
6124       EMACS_INT pos = coding->dst_pos + coding->produced_char;
6125
6126       coding_set_source (coding);
6127       coding->annotated = 0;
6128       coding->charbuf_used = carryover;
6129       (*(coding->decoder)) (coding);
6130       coding_set_destination (coding);
6131       carryover = produce_chars (coding, translation_table, 0);
6132       if (coding->annotated)
6133         produce_annotation (coding, pos);
6134       for (i = 0; i < carryover; i++)
6135         coding->charbuf[i]
6136           = coding->charbuf[coding->charbuf_used - carryover + i];
6137     }
6138   while (coding->consumed < coding->src_bytes
6139          && ! coding->result);
6140
6141   if (carryover > 0)
6142     {
6143       coding_set_destination (coding);
6144       coding->charbuf_used = carryover;
6145       produce_chars (coding, translation_table, 1);
6146     }
6147
6148   coding->carryover_bytes = 0;
6149   if (coding->consumed < coding->src_bytes)
6150     {
6151       int nbytes = coding->src_bytes - coding->consumed;
6152       const unsigned char *src;
6153
6154       coding_set_source (coding);
6155       coding_set_destination (coding);
6156       src = coding->source + coding->consumed;
6157
6158       if (coding->mode & CODING_MODE_LAST_BLOCK)
6159         {
6160           /* Flush out unprocessed data as binary chars.  We are sure
6161              that the number of data is less than the size of
6162              coding->charbuf.  */
6163           coding->charbuf_used = 0;
6164           while (nbytes-- > 0)
6165             {
6166               int c = *src++;
6167
6168               coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
6169             }
6170           produce_chars (coding, Qnil, 1);
6171         }
6172       else
6173         {
6174           /* Record unprocessed bytes in coding->carryover.  We are
6175              sure that the number of data is less than the size of
6176              coding->carryover.  */
6177           unsigned char *p = coding->carryover;
6178
6179           coding->carryover_bytes = nbytes;
6180           while (nbytes-- > 0)
6181             *p++ = *src++;
6182         }
6183       coding->consumed = coding->src_bytes;
6184     }
6185
6186   if (BUFFERP (coding->dst_object))
6187     {
6188       current_buffer->undo_list = undo_list;
6189       record_insert (coding->dst_pos, coding->produced_char);
6190     }
6191   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6192     decode_eol (coding);
6193   return coding->result;
6194 }
6195
6196
6197 /* Extract an annotation datum from a composition starting at POS and
6198    ending before LIMIT of CODING->src_object (buffer or string), store
6199    the data in BUF, set *STOP to a starting position of the next
6200    composition (if any) or to LIMIT, and return the address of the
6201    next element of BUF.
6202
6203    If such an annotation is not found, set *STOP to a starting
6204    position of a composition after POS (if any) or to LIMIT, and
6205    return BUF.  */
6206
6207 static INLINE int *
6208 handle_composition_annotation (pos, limit, coding, buf, stop)
6209      EMACS_INT pos, limit;
6210      struct coding_system *coding;
6211      int *buf;
6212      EMACS_INT *stop;
6213 {
6214   EMACS_INT start, end;
6215   Lisp_Object prop;
6216
6217   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6218       || end > limit)
6219     *stop = limit;
6220   else if (start > pos)
6221     *stop = start;
6222   else
6223     {
6224       if (start == pos)
6225         {
6226           /* We found a composition.  Store the corresponding
6227              annotation data in BUF.  */
6228           int *head = buf;
6229           enum composition_method method = COMPOSITION_METHOD (prop);
6230           int nchars = COMPOSITION_LENGTH (prop);
6231
6232           ADD_COMPOSITION_DATA (buf, nchars, method);
6233           if (method != COMPOSITION_RELATIVE)
6234             {
6235               Lisp_Object components;
6236               int len, i, i_byte;
6237
6238               components = COMPOSITION_COMPONENTS (prop);
6239               if (VECTORP (components))
6240                 {
6241                   len = XVECTOR (components)->size;
6242                   for (i = 0; i < len; i++)
6243                     *buf++ = XINT (AREF (components, i));
6244                 }
6245               else if (STRINGP (components))
6246                 {
6247                   len = SCHARS (components);
6248                   i = i_byte = 0;
6249                   while (i < len)
6250                     {
6251                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6252                       buf++;
6253                     }
6254                 }
6255               else if (INTEGERP (components))
6256                 {
6257                   len = 1;
6258                   *buf++ = XINT (components);
6259                 }
6260               else if (CONSP (components))
6261                 {
6262                   for (len = 0; CONSP (components);
6263                        len++, components = XCDR (components))
6264                     *buf++ = XINT (XCAR (components));
6265                 }
6266               else
6267                 abort ();
6268               *head -= len;
6269             }
6270         }
6271
6272       if (find_composition (end, limit, &start, &end, &prop,
6273                             coding->src_object)
6274           && end <= limit)
6275         *stop = start;
6276       else
6277         *stop = limit;
6278     }
6279   return buf;
6280 }
6281
6282
6283 /* Extract an annotation datum from a text property `charset' at POS of
6284    CODING->src_object (buffer of string), store the data in BUF, set
6285    *STOP to the position where the value of `charset' property changes
6286    (limiting by LIMIT), and return the address of the next element of
6287    BUF.
6288
6289    If the property value is nil, set *STOP to the position where the
6290    property value is non-nil (limiting by LIMIT), and return BUF.  */
6291
6292 static INLINE int *
6293 handle_charset_annotation (pos, limit, coding, buf, stop)
6294      EMACS_INT pos, limit;
6295      struct coding_system *coding;
6296      int *buf;
6297      EMACS_INT *stop;
6298 {
6299   Lisp_Object val, next;
6300   int id;
6301
6302   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6303   if (! NILP (val) && CHARSETP (val))
6304     id = XINT (CHARSET_SYMBOL_ID (val));
6305   else
6306     id = -1;
6307   ADD_CHARSET_DATA (buf, 0, id);
6308   next = Fnext_single_property_change (make_number (pos), Qcharset,
6309                                        coding->src_object,
6310                                        make_number (limit));
6311   *stop = XINT (next);
6312   return buf;
6313 }
6314
6315
6316 static void
6317 consume_chars (coding, translation_table, max_lookup)
6318      struct coding_system *coding;
6319      Lisp_Object translation_table;
6320      int max_lookup;
6321 {
6322   int *buf = coding->charbuf;
6323   int *buf_end = coding->charbuf + coding->charbuf_size;
6324   const unsigned char *src = coding->source + coding->consumed;
6325   const unsigned char *src_end = coding->source + coding->src_bytes;
6326   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6327   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6328   int multibytep = coding->src_multibyte;
6329   Lisp_Object eol_type;
6330   int c;
6331   EMACS_INT stop, stop_composition, stop_charset;
6332   int *lookup_buf = NULL;
6333
6334   if (! NILP (translation_table))
6335     lookup_buf = alloca (sizeof (int) * max_lookup);
6336
6337   eol_type = CODING_ID_EOL_TYPE (coding->id);
6338   if (VECTORP (eol_type))
6339     eol_type = Qunix;
6340
6341   /* Note: composition handling is not yet implemented.  */
6342   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6343
6344   if (NILP (coding->src_object))
6345     stop = stop_composition = stop_charset = end_pos;
6346   else
6347     {
6348       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6349         stop = stop_composition = pos;
6350       else
6351         stop = stop_composition = end_pos;
6352       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6353         stop = stop_charset = pos;
6354       else
6355         stop_charset = end_pos;
6356     }
6357
6358   /* Compensate for CRLF and conversion.  */
6359   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6360   while (buf < buf_end)
6361     {
6362       Lisp_Object trans;
6363
6364       if (pos == stop)
6365         {
6366           if (pos == end_pos)
6367             break;
6368           if (pos == stop_composition)
6369             buf = handle_composition_annotation (pos, end_pos, coding,
6370                                                  buf, &stop_composition);
6371           if (pos == stop_charset)
6372             buf = handle_charset_annotation (pos, end_pos, coding,
6373                                              buf, &stop_charset);
6374           stop = (stop_composition < stop_charset
6375                   ? stop_composition : stop_charset);
6376         }
6377
6378       if (! multibytep)
6379         {
6380           EMACS_INT bytes;
6381
6382           if (! CODING_FOR_UNIBYTE (coding)
6383               && (bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6384             c = STRING_CHAR_ADVANCE (src), pos += bytes;
6385           else
6386             c = *src++, pos++;
6387         }
6388       else
6389         c = STRING_CHAR_ADVANCE (src), pos++;
6390       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6391         c = '\n';
6392       if (! EQ (eol_type, Qunix))
6393         {
6394           if (c == '\n')
6395             {
6396               if (EQ (eol_type, Qdos))
6397                 *buf++ = '\r';
6398               else
6399                 c = '\r';
6400             }
6401         }
6402
6403       trans = Qnil;
6404       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6405       if (NILP (trans))
6406         *buf++ = c;
6407       else
6408         {
6409           int from_nchars = 1, to_nchars = 1;
6410           int *lookup_buf_end;
6411           const unsigned char *p = src;
6412           int i;
6413
6414           lookup_buf[0] = c;
6415           for (i = 1; i < max_lookup && p < src_end; i++)
6416             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6417           lookup_buf_end = lookup_buf + i;
6418           trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6419                                    &from_nchars, &to_nchars);
6420           if (EQ (trans, Qt)
6421               || buf + to_nchars > buf_end)
6422             break;
6423           *buf++ = *lookup_buf;
6424           for (i = 1; i < to_nchars; i++)
6425             *buf++ = XINT (AREF (trans, i));
6426           for (i = 1; i < from_nchars; i++, pos++)
6427             src += MULTIBYTE_LENGTH_NO_CHECK (src);
6428         }
6429     }
6430
6431   coding->consumed = src - coding->source;
6432   coding->consumed_char = pos - coding->src_pos;
6433   coding->charbuf_used = buf - coding->charbuf;
6434   coding->chars_at_source = 0;
6435 }
6436
6437
6438 /* Encode the text at CODING->src_object into CODING->dst_object.
6439    CODING->src_object is a buffer or a string.
6440    CODING->dst_object is a buffer or nil.
6441
6442    If CODING->src_object is a buffer, it must be the current buffer.
6443    In this case, if CODING->src_pos is positive, it is a position of
6444    the source text in the buffer, otherwise. the source text is in the
6445    gap area of the buffer, and coding->src_pos specifies the offset of
6446    the text from GPT (which must be the same as PT).  If this is the
6447    same buffer as CODING->dst_object, CODING->src_pos must be
6448    negative and CODING should not have `pre-write-conversion'.
6449
6450    If CODING->src_object is a string, CODING should not have
6451    `pre-write-conversion'.
6452
6453    If CODING->dst_object is a buffer, the encoded data is inserted at
6454    the current point of that buffer.
6455
6456    If CODING->dst_object is nil, the encoded data is placed at the
6457    memory area specified by CODING->destination.  */
6458
6459 static int
6460 encode_coding (coding)
6461      struct coding_system *coding;
6462 {
6463   Lisp_Object attrs;
6464   Lisp_Object translation_table;
6465   int max_lookup;
6466
6467   attrs = CODING_ID_ATTRS (coding->id);
6468   translation_table = get_translation_table (attrs, 1, &max_lookup);
6469
6470   if (BUFFERP (coding->dst_object))
6471     {
6472       set_buffer_internal (XBUFFER (coding->dst_object));
6473       coding->dst_multibyte
6474         = ! NILP (current_buffer->enable_multibyte_characters);
6475     }
6476
6477   coding->consumed = coding->consumed_char = 0;
6478   coding->produced = coding->produced_char = 0;
6479   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6480   coding->errors = 0;
6481
6482   ALLOC_CONVERSION_WORK_AREA (coding);
6483
6484   do {
6485     coding_set_source (coding);
6486     consume_chars (coding, translation_table, max_lookup);
6487     coding_set_destination (coding);
6488     (*(coding->encoder)) (coding);
6489   } while (coding->consumed_char < coding->src_chars);
6490
6491   if (BUFFERP (coding->dst_object))
6492     insert_from_gap (coding->produced_char, coding->produced);
6493
6494   return (coding->result);
6495 }
6496
6497
6498 /* Name (or base name) of work buffer for code conversion.  */
6499 static Lisp_Object Vcode_conversion_workbuf_name;
6500
6501 /* A working buffer used by the top level conversion.  Once it is
6502    created, it is never destroyed.  It has the name
6503    Vcode_conversion_workbuf_name.  The other working buffers are
6504    destroyed after the use is finished, and their names are modified
6505    versions of Vcode_conversion_workbuf_name.  */
6506 static Lisp_Object Vcode_conversion_reused_workbuf;
6507
6508 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
6509 static int reused_workbuf_in_use;
6510
6511
6512 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
6513    multibyteness of returning buffer.  */
6514
6515 static Lisp_Object
6516 make_conversion_work_buffer (multibyte)
6517      int multibyte;
6518 {
6519   Lisp_Object name, workbuf;
6520   struct buffer *current;
6521
6522   if (reused_workbuf_in_use++)
6523     {
6524       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6525       workbuf = Fget_buffer_create (name);
6526     }
6527   else
6528     {
6529       name = Vcode_conversion_workbuf_name;
6530       workbuf = Fget_buffer_create (name);
6531       if (NILP (Vcode_conversion_reused_workbuf))
6532         Vcode_conversion_reused_workbuf = workbuf;
6533     }
6534   current = current_buffer;
6535   set_buffer_internal (XBUFFER (workbuf));
6536   Ferase_buffer ();
6537   current_buffer->undo_list = Qt;
6538   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
6539   set_buffer_internal (current);
6540   return workbuf;
6541 }
6542
6543
6544 static Lisp_Object
6545 code_conversion_restore (arg)
6546      Lisp_Object arg;
6547 {
6548   Lisp_Object current, workbuf;
6549
6550   current = XCAR (arg);
6551   workbuf = XCDR (arg);
6552   if (! NILP (workbuf))
6553     {
6554       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6555         reused_workbuf_in_use = 0;
6556       else if (! NILP (Fbuffer_live_p (workbuf)))
6557         Fkill_buffer (workbuf);
6558     }
6559   set_buffer_internal (XBUFFER (current));
6560   return Qnil;
6561 }
6562
6563 Lisp_Object
6564 code_conversion_save (with_work_buf, multibyte)
6565      int with_work_buf, multibyte;
6566 {
6567   Lisp_Object workbuf = Qnil;
6568
6569   if (with_work_buf)
6570     workbuf = make_conversion_work_buffer (multibyte);
6571   record_unwind_protect (code_conversion_restore,
6572                          Fcons (Fcurrent_buffer (), workbuf));
6573   return workbuf;
6574 }
6575
6576 int
6577 decode_coding_gap (coding, chars, bytes)
6578      struct coding_system *coding;
6579      EMACS_INT chars, bytes;
6580 {
6581   int count = specpdl_ptr - specpdl;
6582   Lisp_Object attrs;
6583
6584   code_conversion_save (0, 0);
6585
6586   coding->src_object = Fcurrent_buffer ();
6587   coding->src_chars = chars;
6588   coding->src_bytes = bytes;
6589   coding->src_pos = -chars;
6590   coding->src_pos_byte = -bytes;
6591   coding->src_multibyte = chars < bytes;
6592   coding->dst_object = coding->src_object;
6593   coding->dst_pos = PT;
6594   coding->dst_pos_byte = PT_BYTE;
6595   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6596   coding->mode |= CODING_MODE_LAST_BLOCK;
6597
6598   if (CODING_REQUIRE_DETECTION (coding))
6599     detect_coding (coding);
6600
6601   decode_coding (coding);
6602
6603   attrs = CODING_ID_ATTRS (coding->id);
6604   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6605     {
6606       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6607       Lisp_Object val;
6608
6609       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6610       val = call1 (CODING_ATTR_POST_READ (attrs),
6611                    make_number (coding->produced_char));
6612       CHECK_NATNUM (val);
6613       coding->produced_char += Z - prev_Z;
6614       coding->produced += Z_BYTE - prev_Z_BYTE;
6615     }
6616
6617   unbind_to (count, Qnil);
6618   return coding->result;
6619 }
6620
6621 int
6622 encode_coding_gap (coding, chars, bytes)
6623      struct coding_system *coding;
6624      EMACS_INT chars, bytes;
6625 {
6626   int count = specpdl_ptr - specpdl;
6627
6628   code_conversion_save (0, 0);
6629
6630   coding->src_object = Fcurrent_buffer ();
6631   coding->src_chars = chars;
6632   coding->src_bytes = bytes;
6633   coding->src_pos = -chars;
6634   coding->src_pos_byte = -bytes;
6635   coding->src_multibyte = chars < bytes;
6636   coding->dst_object = coding->src_object;
6637   coding->dst_pos = PT;
6638   coding->dst_pos_byte = PT_BYTE;
6639
6640   encode_coding (coding);
6641
6642   unbind_to (count, Qnil);
6643   return coding->result;
6644 }
6645
6646
6647 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6648    SRC_OBJECT into DST_OBJECT by coding context CODING.
6649
6650    SRC_OBJECT is a buffer, a string, or Qnil.
6651
6652    If it is a buffer, the text is at point of the buffer.  FROM and TO
6653    are positions in the buffer.
6654
6655    If it is a string, the text is at the beginning of the string.
6656    FROM and TO are indices to the string.
6657
6658    If it is nil, the text is at coding->source.  FROM and TO are
6659    indices to coding->source.
6660
6661    DST_OBJECT is a buffer, Qt, or Qnil.
6662
6663    If it is a buffer, the decoded text is inserted at point of the
6664    buffer.  If the buffer is the same as SRC_OBJECT, the source text
6665    is deleted.
6666
6667    If it is Qt, a string is made from the decoded text, and
6668    set in CODING->dst_object.
6669
6670    If it is Qnil, the decoded text is stored at CODING->destination.
6671    The caller must allocate CODING->dst_bytes bytes at
6672    CODING->destination by xmalloc.  If the decoded text is longer than
6673    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6674  */
6675
6676 void
6677 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6678                       dst_object)
6679      struct coding_system *coding;
6680      Lisp_Object src_object;
6681      EMACS_INT from, from_byte, to, to_byte;
6682      Lisp_Object dst_object;
6683 {
6684   int count = specpdl_ptr - specpdl;
6685   unsigned char *destination;
6686   EMACS_INT dst_bytes;
6687   EMACS_INT chars = to - from;
6688   EMACS_INT bytes = to_byte - from_byte;
6689   Lisp_Object attrs;
6690   Lisp_Object buffer;
6691   int saved_pt = -1, saved_pt_byte;
6692
6693   buffer = Fcurrent_buffer ();
6694
6695   if (NILP (dst_object))
6696     {
6697       destination = coding->destination;
6698       dst_bytes = coding->dst_bytes;
6699     }
6700
6701   coding->src_object = src_object;
6702   coding->src_chars = chars;
6703   coding->src_bytes = bytes;
6704   coding->src_multibyte = chars < bytes;
6705
6706   if (STRINGP (src_object))
6707     {
6708       coding->src_pos = from;
6709       coding->src_pos_byte = from_byte;
6710     }
6711   else if (BUFFERP (src_object))
6712     {
6713       set_buffer_internal (XBUFFER (src_object));
6714       if (from != GPT)
6715         move_gap_both (from, from_byte);
6716       if (EQ (src_object, dst_object))
6717         {
6718           saved_pt = PT, saved_pt_byte = PT_BYTE;
6719           TEMP_SET_PT_BOTH (from, from_byte);
6720           del_range_both (from, from_byte, to, to_byte, 1);
6721           coding->src_pos = -chars;
6722           coding->src_pos_byte = -bytes;
6723         }
6724       else
6725         {
6726           coding->src_pos = from;
6727           coding->src_pos_byte = from_byte;
6728         }
6729     }
6730
6731   if (CODING_REQUIRE_DETECTION (coding))
6732     detect_coding (coding);
6733   attrs = CODING_ID_ATTRS (coding->id);
6734
6735   if (EQ (dst_object, Qt)
6736       || (! NILP (CODING_ATTR_POST_READ (attrs))
6737           && NILP (dst_object)))
6738     {
6739       coding->dst_object = code_conversion_save (1, 1);
6740       coding->dst_pos = BEG;
6741       coding->dst_pos_byte = BEG_BYTE;
6742       coding->dst_multibyte = 1;
6743     }
6744   else if (BUFFERP (dst_object))
6745     {
6746       code_conversion_save (0, 0);
6747       coding->dst_object = dst_object;
6748       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6749       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6750       coding->dst_multibyte
6751         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6752     }
6753   else
6754     {
6755       code_conversion_save (0, 0);
6756       coding->dst_object = Qnil;
6757       coding->dst_multibyte = 1;
6758     }
6759
6760   decode_coding (coding);
6761
6762   if (BUFFERP (coding->dst_object))
6763     set_buffer_internal (XBUFFER (coding->dst_object));
6764
6765   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6766     {
6767       struct gcpro gcpro1, gcpro2;
6768       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6769       Lisp_Object val;
6770
6771       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6772       GCPRO2 (coding->src_object, coding->dst_object);
6773       val = call1 (CODING_ATTR_POST_READ (attrs),
6774                    make_number (coding->produced_char));
6775       UNGCPRO;
6776       CHECK_NATNUM (val);
6777       coding->produced_char += Z - prev_Z;
6778       coding->produced += Z_BYTE - prev_Z_BYTE;
6779     }
6780
6781   if (EQ (dst_object, Qt))
6782     {
6783       coding->dst_object = Fbuffer_string ();
6784     }
6785   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6786     {
6787       set_buffer_internal (XBUFFER (coding->dst_object));
6788       if (dst_bytes < coding->produced)
6789         {
6790           destination
6791             = (unsigned char *) xrealloc (destination, coding->produced);
6792           if (! destination)
6793             {
6794               record_conversion_result (coding,
6795                                         CODING_RESULT_INSUFFICIENT_DST);
6796               unbind_to (count, Qnil);
6797               return;
6798             }
6799           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6800             move_gap_both (BEGV, BEGV_BYTE);
6801           bcopy (BEGV_ADDR, destination, coding->produced);
6802           coding->destination = destination;
6803         }
6804     }
6805
6806   if (saved_pt >= 0)
6807     {
6808       /* This is the case of:
6809          (BUFFERP (src_object) && EQ (src_object, dst_object))
6810          As we have moved PT while replacing the original buffer
6811          contents, we must recover it now.  */
6812       set_buffer_internal (XBUFFER (src_object));
6813       if (saved_pt < from)
6814         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6815       else if (saved_pt < from + chars)
6816         TEMP_SET_PT_BOTH (from, from_byte);
6817       else if (! NILP (current_buffer->enable_multibyte_characters))
6818         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6819                           saved_pt_byte + (coding->produced - bytes));
6820       else
6821         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6822                           saved_pt_byte + (coding->produced - bytes));
6823     }
6824
6825   unbind_to (count, coding->dst_object);
6826 }
6827
6828
6829 void
6830 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6831                       dst_object)
6832      struct coding_system *coding;
6833      Lisp_Object src_object;
6834      EMACS_INT from, from_byte, to, to_byte;
6835      Lisp_Object dst_object;
6836 {
6837   int count = specpdl_ptr - specpdl;
6838   EMACS_INT chars = to - from;
6839   EMACS_INT bytes = to_byte - from_byte;
6840   Lisp_Object attrs;
6841   Lisp_Object buffer;
6842   int saved_pt = -1, saved_pt_byte;
6843
6844   buffer = Fcurrent_buffer ();
6845
6846   coding->src_object = src_object;
6847   coding->src_chars = chars;
6848   coding->src_bytes = bytes;
6849   coding->src_multibyte = chars < bytes;
6850
6851   attrs = CODING_ID_ATTRS (coding->id);
6852
6853   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6854     {
6855       coding->src_object = code_conversion_save (1, coding->src_multibyte);
6856       set_buffer_internal (XBUFFER (coding->src_object));
6857       if (STRINGP (src_object))
6858         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6859       else if (BUFFERP (src_object))
6860         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6861       else
6862         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6863
6864       if (EQ (src_object, dst_object))
6865         {
6866           set_buffer_internal (XBUFFER (src_object));
6867           saved_pt = PT, saved_pt_byte = PT_BYTE;
6868           del_range_both (from, from_byte, to, to_byte, 1);
6869           set_buffer_internal (XBUFFER (coding->src_object));
6870         }
6871
6872       call2 (CODING_ATTR_PRE_WRITE (attrs),
6873              make_number (BEG), make_number (Z));
6874       coding->src_object = Fcurrent_buffer ();
6875       if (BEG != GPT)
6876         move_gap_both (BEG, BEG_BYTE);
6877       coding->src_chars = Z - BEG;
6878       coding->src_bytes = Z_BYTE - BEG_BYTE;
6879       coding->src_pos = BEG;
6880       coding->src_pos_byte = BEG_BYTE;
6881       coding->src_multibyte = Z < Z_BYTE;
6882     }
6883   else if (STRINGP (src_object))
6884     {
6885       code_conversion_save (0, 0);
6886       coding->src_pos = from;
6887       coding->src_pos_byte = from_byte;
6888     }
6889   else if (BUFFERP (src_object))
6890     {
6891       code_conversion_save (0, 0);
6892       set_buffer_internal (XBUFFER (src_object));
6893       if (EQ (src_object, dst_object))
6894         {
6895           saved_pt = PT, saved_pt_byte = PT_BYTE;
6896           coding->src_object = del_range_1 (from, to, 1, 1);
6897           coding->src_pos = 0;
6898           coding->src_pos_byte = 0;
6899         }
6900       else
6901         {
6902           if (from < GPT && to >= GPT)
6903             move_gap_both (from, from_byte);
6904           coding->src_pos = from;
6905           coding->src_pos_byte = from_byte;
6906         }
6907     }
6908   else
6909     code_conversion_save (0, 0);
6910
6911   if (BUFFERP (dst_object))
6912     {
6913       coding->dst_object = dst_object;
6914       if (EQ (src_object, dst_object))
6915         {
6916           coding->dst_pos = from;
6917           coding->dst_pos_byte = from_byte;
6918         }
6919       else
6920         {
6921           coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6922           coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6923         }
6924       coding->dst_multibyte
6925         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6926     }
6927   else if (EQ (dst_object, Qt))
6928     {
6929       coding->dst_object = Qnil;
6930       coding->dst_bytes = coding->src_chars;
6931       if (coding->dst_bytes == 0)
6932         coding->dst_bytes = 1;
6933       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
6934       coding->dst_multibyte = 0;
6935     }
6936   else
6937     {
6938       coding->dst_object = Qnil;
6939       coding->dst_multibyte = 0;
6940     }
6941
6942   encode_coding (coding);
6943
6944   if (EQ (dst_object, Qt))
6945     {
6946       if (BUFFERP (coding->dst_object))
6947         coding->dst_object = Fbuffer_string ();
6948       else
6949         {
6950           coding->dst_object
6951             = make_unibyte_string ((char *) coding->destination,
6952                                    coding->produced);
6953           xfree (coding->destination);
6954         }
6955     }
6956
6957   if (saved_pt >= 0)
6958     {
6959       /* This is the case of:
6960          (BUFFERP (src_object) && EQ (src_object, dst_object))
6961          As we have moved PT while replacing the original buffer
6962          contents, we must recover it now.  */
6963       set_buffer_internal (XBUFFER (src_object));
6964       if (saved_pt < from)
6965         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6966       else if (saved_pt < from + chars)
6967         TEMP_SET_PT_BOTH (from, from_byte);
6968       else if (! NILP (current_buffer->enable_multibyte_characters))
6969         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6970                           saved_pt_byte + (coding->produced - bytes));
6971       else
6972         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6973                           saved_pt_byte + (coding->produced - bytes));
6974     }
6975
6976   unbind_to (count, Qnil);
6977 }
6978
6979
6980 Lisp_Object
6981 preferred_coding_system ()
6982 {
6983   int id = coding_categories[coding_priorities[0]].id;
6984
6985   return CODING_ID_NAME (id);
6986 }
6987
6988 \f
6989 #ifdef emacs
6990 /*** 8. Emacs Lisp library functions ***/
6991
6992 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6993        doc: /* Return t if OBJECT is nil or a coding-system.
6994 See the documentation of `define-coding-system' for information
6995 about coding-system objects.  */)
6996      (obj)
6997      Lisp_Object obj;
6998 {
6999   return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
7000 }
7001
7002 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7003        Sread_non_nil_coding_system, 1, 1, 0,
7004        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
7005      (prompt)
7006      Lisp_Object prompt;
7007 {
7008   Lisp_Object val;
7009   do
7010     {
7011       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7012                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
7013     }
7014   while (SCHARS (val) == 0);
7015   return (Fintern (val, Qnil));
7016 }
7017
7018 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
7019        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
7020 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
7021      (prompt, default_coding_system)
7022      Lisp_Object prompt, default_coding_system;
7023 {
7024   Lisp_Object val;
7025   if (SYMBOLP (default_coding_system))
7026     XSETSTRING (default_coding_system, XPNTR (SYMBOL_NAME (default_coding_system)));
7027   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7028                           Qt, Qnil, Qcoding_system_history,
7029                           default_coding_system, Qnil);
7030   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
7031 }
7032
7033 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7034        1, 1, 0,
7035        doc: /* Check validity of CODING-SYSTEM.
7036 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7037 It is valid if it is nil or a symbol defined as a coding system by the
7038 function `define-coding-system'.  */)
7039   (coding_system)
7040      Lisp_Object coding_system;
7041 {
7042   CHECK_SYMBOL (coding_system);
7043   if (!NILP (Fcoding_system_p (coding_system)))
7044     return coding_system;
7045   while (1)
7046     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
7047 }
7048
7049 \f
7050 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
7051    HIGHEST is nonzero, return the coding system of the highest
7052    priority among the detected coding systems.  Otherwize return a
7053    list of detected coding systems sorted by their priorities.  If
7054    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7055    multibyte form but contains only ASCII and eight-bit chars.
7056    Otherwise, the bytes are raw bytes.
7057
7058    CODING-SYSTEM controls the detection as below:
7059
7060    If it is nil, detect both text-format and eol-format.  If the
7061    text-format part of CODING-SYSTEM is already specified
7062    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
7063    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7064    detect only text-format.  */
7065
7066 Lisp_Object
7067 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7068                       coding_system)
7069      const unsigned char *src;
7070      int src_chars, src_bytes, highest;
7071      int multibytep;
7072      Lisp_Object coding_system;
7073 {
7074   const unsigned char *src_end = src + src_bytes;
7075   Lisp_Object attrs, eol_type;
7076   Lisp_Object val;
7077   struct coding_system coding;
7078   int id;
7079   struct coding_detection_info detect_info;
7080   enum coding_category base_category;
7081
7082   if (NILP (coding_system))
7083     coding_system = Qundecided;
7084   setup_coding_system (coding_system, &coding);
7085   attrs = CODING_ID_ATTRS (coding.id);
7086   eol_type = CODING_ID_EOL_TYPE (coding.id);
7087   coding_system = CODING_ATTR_BASE_NAME (attrs);
7088
7089   coding.source = src;
7090   coding.src_chars = src_chars;
7091   coding.src_bytes = src_bytes;
7092   coding.src_multibyte = multibytep;
7093   coding.consumed = 0;
7094   coding.mode |= CODING_MODE_LAST_BLOCK;
7095
7096   detect_info.checked = detect_info.found = detect_info.rejected = 0;
7097
7098   /* At first, detect text-format if necessary.  */
7099   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7100   if (base_category == coding_category_undecided)
7101     {
7102       enum coding_category category;
7103       struct coding_system *this;
7104       int c, i;
7105
7106       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
7107       for (i = 0; src < src_end; i++, src++)
7108         {
7109           c = *src;
7110           if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
7111                                         || c == ISO_CODE_SI
7112                                         || c == ISO_CODE_SO)))
7113             break;
7114         }
7115       coding.head_ascii = src - coding.source;
7116
7117       if (src < src_end)
7118         for (i = 0; i < coding_category_raw_text; i++)
7119           {
7120             category = coding_priorities[i];
7121             this = coding_categories + category;
7122
7123             if (this->id < 0)
7124               {
7125                 /* No coding system of this category is defined.  */
7126                 detect_info.rejected |= (1 << category);
7127               }
7128             else if (category >= coding_category_raw_text)
7129               continue;
7130             else if (detect_info.checked & (1 << category))
7131               {
7132                 if (highest
7133                     && (detect_info.found & (1 << category)))
7134                   break;
7135               }
7136             else
7137               {
7138                 if ((*(this->detector)) (&coding, &detect_info)
7139                     && highest
7140                     && (detect_info.found & (1 << category)))
7141                   {
7142                     if (category == coding_category_utf_16_auto)
7143                       {
7144                         if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7145                           category = coding_category_utf_16_le;
7146                         else
7147                           category = coding_category_utf_16_be;
7148                       }
7149                     break;
7150                   }
7151               }
7152           }
7153
7154       if (detect_info.rejected == CATEGORY_MASK_ANY)
7155         {
7156           detect_info.found = CATEGORY_MASK_RAW_TEXT;
7157           id = coding_categories[coding_category_raw_text].id;
7158           val = Fcons (make_number (id), Qnil);
7159         }
7160       else if (! detect_info.rejected && ! detect_info.found)
7161         {
7162           detect_info.found = CATEGORY_MASK_ANY;
7163           id = coding_categories[coding_category_undecided].id;
7164           val = Fcons (make_number (id), Qnil);
7165         }
7166       else if (highest)
7167         {
7168           if (detect_info.found)
7169             {
7170               detect_info.found = 1 << category;
7171               val = Fcons (make_number (this->id), Qnil);
7172             }
7173           else
7174             for (i = 0; i < coding_category_raw_text; i++)
7175               if (! (detect_info.rejected & (1 << coding_priorities[i])))
7176                 {
7177                   detect_info.found = 1 << coding_priorities[i];
7178                   id = coding_categories[coding_priorities[i]].id;
7179                   val = Fcons (make_number (id), Qnil);
7180                   break;
7181                 }
7182         }
7183       else
7184         {
7185           int mask = detect_info.rejected | detect_info.found;
7186           int found = 0;
7187           val = Qnil;
7188
7189           for (i = coding_category_raw_text - 1; i >= 0; i--)
7190             {
7191               category = coding_priorities[i];
7192               if (! (mask & (1 << category)))
7193                 {
7194                   found |= 1 << category;
7195                   id = coding_categories[category].id;
7196                   val = Fcons (make_number (id), val);
7197                 }
7198             }
7199           for (i = coding_category_raw_text - 1; i >= 0; i--)
7200             {
7201               category = coding_priorities[i];
7202               if (detect_info.found & (1 << category))
7203                 {
7204                   id = coding_categories[category].id;
7205                   val = Fcons (make_number (id), val);
7206                 }
7207             }
7208           detect_info.found |= found;
7209         }
7210     }
7211   else if (base_category == coding_category_utf_16_auto)
7212     {
7213       if (detect_coding_utf_16 (&coding, &detect_info))
7214         {
7215           enum coding_category category;
7216           struct coding_system *this;
7217
7218           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7219             this = coding_categories + coding_category_utf_16_le;
7220           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7221             this = coding_categories + coding_category_utf_16_be;
7222           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7223             this = coding_categories + coding_category_utf_16_be_nosig;
7224           else
7225             this = coding_categories + coding_category_utf_16_le_nosig;
7226           val = Fcons (make_number (this->id), Qnil);
7227         }
7228     }
7229   else
7230     {
7231       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
7232       val = Fcons (make_number (coding.id), Qnil);
7233     }
7234
7235   /* Then, detect eol-format if necessary.  */
7236   {
7237     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
7238     Lisp_Object tail;
7239
7240     if (VECTORP (eol_type))
7241       {
7242         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
7243           normal_eol = detect_eol (coding.source, src_bytes,
7244                                    coding_category_raw_text);
7245         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7246                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
7247           utf_16_be_eol = detect_eol (coding.source, src_bytes,
7248                                       coding_category_utf_16_be);
7249         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7250                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
7251           utf_16_le_eol = detect_eol (coding.source, src_bytes,
7252                                       coding_category_utf_16_le);
7253       }
7254     else
7255       {
7256         if (EQ (eol_type, Qunix))
7257           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7258         else if (EQ (eol_type, Qdos))
7259           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7260         else
7261           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7262       }
7263
7264     for (tail = val; CONSP (tail); tail = XCDR (tail))
7265       {
7266         enum coding_category category;
7267         int this_eol;
7268
7269         id = XINT (XCAR (tail));
7270         attrs = CODING_ID_ATTRS (id);
7271         category = XINT (CODING_ATTR_CATEGORY (attrs));
7272         eol_type = CODING_ID_EOL_TYPE (id);
7273         if (VECTORP (eol_type))
7274           {
7275             if (category == coding_category_utf_16_be
7276                 || category == coding_category_utf_16_be_nosig)
7277               this_eol = utf_16_be_eol;
7278             else if (category == coding_category_utf_16_le
7279                      || category == coding_category_utf_16_le_nosig)
7280               this_eol = utf_16_le_eol;
7281             else
7282               this_eol = normal_eol;
7283
7284             if (this_eol == EOL_SEEN_LF)
7285               XSETCAR (tail, AREF (eol_type, 0));
7286             else if (this_eol == EOL_SEEN_CRLF)
7287               XSETCAR (tail, AREF (eol_type, 1));
7288             else if (this_eol == EOL_SEEN_CR)
7289               XSETCAR (tail, AREF (eol_type, 2));
7290             else
7291               XSETCAR (tail, CODING_ID_NAME (id));
7292           }
7293         else
7294           XSETCAR (tail, CODING_ID_NAME (id));
7295       }
7296   }
7297
7298   return (highest ? XCAR (val) : val);
7299 }
7300
7301
7302 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7303        2, 3, 0,
7304        doc: /* Detect coding system of the text in the region between START and END.
7305 Return a list of possible coding systems ordered by priority.
7306
7307 If only ASCII characters are found, it returns a list of single element
7308 `undecided' or its subsidiary coding system according to a detected
7309 end-of-line format.
7310
7311 If optional argument HIGHEST is non-nil, return the coding system of
7312 highest priority.  */)
7313      (start, end, highest)
7314      Lisp_Object start, end, highest;
7315 {
7316   int from, to;
7317   int from_byte, to_byte;
7318
7319   CHECK_NUMBER_COERCE_MARKER (start);
7320   CHECK_NUMBER_COERCE_MARKER (end);
7321
7322   validate_region (&start, &end);
7323   from = XINT (start), to = XINT (end);
7324   from_byte = CHAR_TO_BYTE (from);
7325   to_byte = CHAR_TO_BYTE (to);
7326
7327   if (from < GPT && to >= GPT)
7328     move_gap_both (to, to_byte);
7329
7330   return detect_coding_system (BYTE_POS_ADDR (from_byte),
7331                                to - from, to_byte - from_byte,
7332                                !NILP (highest),
7333                                !NILP (current_buffer
7334                                       ->enable_multibyte_characters),
7335                                Qnil);
7336 }
7337
7338 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7339        1, 2, 0,
7340        doc: /* Detect coding system of the text in STRING.
7341 Return a list of possible coding systems ordered by priority.
7342
7343 If only ASCII characters are found, it returns a list of single element
7344 `undecided' or its subsidiary coding system according to a detected
7345 end-of-line format.
7346
7347 If optional argument HIGHEST is non-nil, return the coding system of
7348 highest priority.  */)
7349      (string, highest)
7350      Lisp_Object string, highest;
7351 {
7352   CHECK_STRING (string);
7353
7354   return detect_coding_system (SDATA (string),
7355                                SCHARS (string), SBYTES (string),
7356                                !NILP (highest), STRING_MULTIBYTE (string),
7357                                Qnil);
7358 }
7359
7360
7361 static INLINE int
7362 char_encodable_p (c, attrs)
7363      int c;
7364      Lisp_Object attrs;
7365 {
7366   Lisp_Object tail;
7367   struct charset *charset;
7368   Lisp_Object translation_table;
7369
7370   translation_table = CODING_ATTR_TRANS_TBL (attrs);
7371   if (! NILP (translation_table))
7372     c = translate_char (translation_table, c);
7373   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7374        CONSP (tail); tail = XCDR (tail))
7375     {
7376       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7377       if (CHAR_CHARSET_P (c, charset))
7378         break;
7379     }
7380   return (! NILP (tail));
7381 }
7382
7383
7384 /* Return a list of coding systems that safely encode the text between
7385    START and END.  If EXCLUDE is non-nil, it is a list of coding
7386    systems not to check.  The returned list doesn't contain any such
7387    coding systems.  In any case, if the text contains only ASCII or is
7388    unibyte, return t.  */
7389
7390 DEFUN ("find-coding-systems-region-internal",
7391        Ffind_coding_systems_region_internal,
7392        Sfind_coding_systems_region_internal, 2, 3, 0,
7393        doc: /* Internal use only.  */)
7394      (start, end, exclude)
7395      Lisp_Object start, end, exclude;
7396 {
7397   Lisp_Object coding_attrs_list, safe_codings;
7398   EMACS_INT start_byte, end_byte;
7399   const unsigned char *p, *pbeg, *pend;
7400   int c;
7401   Lisp_Object tail, elt;
7402
7403   if (STRINGP (start))
7404     {
7405       if (!STRING_MULTIBYTE (start)
7406           || SCHARS (start) == SBYTES (start))
7407         return Qt;
7408       start_byte = 0;
7409       end_byte = SBYTES (start);
7410     }
7411   else
7412     {
7413       CHECK_NUMBER_COERCE_MARKER (start);
7414       CHECK_NUMBER_COERCE_MARKER (end);
7415       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7416         args_out_of_range (start, end);
7417       if (NILP (current_buffer->enable_multibyte_characters))
7418         return Qt;
7419       start_byte = CHAR_TO_BYTE (XINT (start));
7420       end_byte = CHAR_TO_BYTE (XINT (end));
7421       if (XINT (end) - XINT (start) == end_byte - start_byte)
7422         return Qt;
7423
7424       if (XINT (start) < GPT && XINT (end) > GPT)
7425         {
7426           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7427             move_gap_both (XINT (start), start_byte);
7428           else
7429             move_gap_both (XINT (end), end_byte);
7430         }
7431     }
7432
7433   coding_attrs_list = Qnil;
7434   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7435     if (NILP (exclude)
7436         || NILP (Fmemq (XCAR (tail), exclude)))
7437       {
7438         Lisp_Object attrs;
7439
7440         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7441         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7442             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7443           {
7444             ASET (attrs, coding_attr_trans_tbl,
7445                   get_translation_table (attrs, 1, NULL));
7446             coding_attrs_list = Fcons (attrs, coding_attrs_list);
7447           }
7448       }
7449
7450   if (STRINGP (start))
7451     p = pbeg = SDATA (start);
7452   else
7453     p = pbeg = BYTE_POS_ADDR (start_byte);
7454   pend = p + (end_byte - start_byte);
7455
7456   while (p < pend && ASCII_BYTE_P (*p)) p++;
7457   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7458
7459   while (p < pend)
7460     {
7461       if (ASCII_BYTE_P (*p))
7462         p++;
7463       else
7464         {
7465           c = STRING_CHAR_ADVANCE (p);
7466
7467           charset_map_loaded = 0;
7468           for (tail = coding_attrs_list; CONSP (tail);)
7469             {
7470               elt = XCAR (tail);
7471               if (NILP (elt))
7472                 tail = XCDR (tail);
7473               else if (char_encodable_p (c, elt))
7474                 tail = XCDR (tail);
7475               else if (CONSP (XCDR (tail)))
7476                 {
7477                   XSETCAR (tail, XCAR (XCDR (tail)));
7478                   XSETCDR (tail, XCDR (XCDR (tail)));
7479                 }
7480               else
7481                 {
7482                   XSETCAR (tail, Qnil);
7483                   tail = XCDR (tail);
7484                 }
7485             }
7486           if (charset_map_loaded)
7487             {
7488               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7489
7490               if (STRINGP (start))
7491                 pbeg = SDATA (start);
7492               else
7493                 pbeg = BYTE_POS_ADDR (start_byte);
7494               p = pbeg + p_offset;
7495               pend = pbeg + pend_offset;
7496             }
7497         }
7498     }
7499
7500   safe_codings = Qnil;
7501   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7502     if (! NILP (XCAR (tail)))
7503       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
7504
7505   return safe_codings;
7506 }
7507
7508
7509 DEFUN ("unencodable-char-position", Funencodable_char_position,
7510        Sunencodable_char_position, 3, 5, 0,
7511        doc: /*
7512 Return position of first un-encodable character in a region.
7513 START and END specfiy the region and CODING-SYSTEM specifies the
7514 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
7515
7516 If optional 4th argument COUNT is non-nil, it specifies at most how
7517 many un-encodable characters to search.  In this case, the value is a
7518 list of positions.
7519
7520 If optional 5th argument STRING is non-nil, it is a string to search
7521 for un-encodable characters.  In that case, START and END are indexes
7522 to the string.  */)
7523      (start, end, coding_system, count, string)
7524      Lisp_Object start, end, coding_system, count, string;
7525 {
7526   int n;
7527   struct coding_system coding;
7528   Lisp_Object attrs, charset_list, translation_table;
7529   Lisp_Object positions;
7530   int from, to;
7531   const unsigned char *p, *stop, *pend;
7532   int ascii_compatible;
7533
7534   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7535   attrs = CODING_ID_ATTRS (coding.id);
7536   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7537     return Qnil;
7538   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7539   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7540   translation_table = get_translation_table (attrs, 1, NULL);
7541
7542   if (NILP (string))
7543     {
7544       validate_region (&start, &end);
7545       from = XINT (start);
7546       to = XINT (end);
7547       if (NILP (current_buffer->enable_multibyte_characters)
7548           || (ascii_compatible
7549               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7550         return Qnil;
7551       p = CHAR_POS_ADDR (from);
7552       pend = CHAR_POS_ADDR (to);
7553       if (from < GPT && to >= GPT)
7554         stop = GPT_ADDR;
7555       else
7556         stop = pend;
7557     }
7558   else
7559     {
7560       CHECK_STRING (string);
7561       CHECK_NATNUM (start);
7562       CHECK_NATNUM (end);
7563       from = XINT (start);
7564       to = XINT (end);
7565       if (from > to
7566           || to > SCHARS (string))
7567         args_out_of_range_3 (string, start, end);
7568       if (! STRING_MULTIBYTE (string))
7569         return Qnil;
7570       p = SDATA (string) + string_char_to_byte (string, from);
7571       stop = pend = SDATA (string) + string_char_to_byte (string, to);
7572       if (ascii_compatible && (to - from) == (pend - p))
7573         return Qnil;
7574     }
7575
7576   if (NILP (count))
7577     n = 1;
7578   else
7579     {
7580       CHECK_NATNUM (count);
7581       n = XINT (count);
7582     }
7583
7584   positions = Qnil;
7585   while (1)
7586     {
7587       int c;
7588
7589       if (ascii_compatible)
7590         while (p < stop && ASCII_BYTE_P (*p))
7591           p++, from++;
7592       if (p >= stop)
7593         {
7594           if (p >= pend)
7595             break;
7596           stop = pend;
7597           p = GAP_END_ADDR;
7598         }
7599
7600       c = STRING_CHAR_ADVANCE (p);
7601       if (! (ASCII_CHAR_P (c) && ascii_compatible)
7602           && ! char_charset (translate_char (translation_table, c),
7603                              charset_list, NULL))
7604         {
7605           positions = Fcons (make_number (from), positions);
7606           n--;
7607           if (n == 0)
7608             break;
7609         }
7610
7611       from++;
7612     }
7613
7614   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
7615 }
7616
7617
7618 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7619        Scheck_coding_systems_region, 3, 3, 0,
7620        doc: /* Check if the region is encodable by coding systems.
7621
7622 START and END are buffer positions specifying the region.
7623 CODING-SYSTEM-LIST is a list of coding systems to check.
7624
7625 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7626 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7627 whole region, POS0, POS1, ... are buffer positions where non-encodable
7628 characters are found.
7629
7630 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7631 value is nil.
7632
7633 START may be a string.  In that case, check if the string is
7634 encodable, and the value contains indices to the string instead of
7635 buffer positions.  END is ignored.  */)
7636      (start, end, coding_system_list)
7637      Lisp_Object start, end, coding_system_list;
7638 {
7639   Lisp_Object list;
7640   EMACS_INT start_byte, end_byte;
7641   int pos;
7642   const unsigned char *p, *pbeg, *pend;
7643   int c;
7644   Lisp_Object tail, elt, attrs;
7645
7646   if (STRINGP (start))
7647     {
7648       if (!STRING_MULTIBYTE (start)
7649           && SCHARS (start) != SBYTES (start))
7650         return Qnil;
7651       start_byte = 0;
7652       end_byte = SBYTES (start);
7653       pos = 0;
7654     }
7655   else
7656     {
7657       CHECK_NUMBER_COERCE_MARKER (start);
7658       CHECK_NUMBER_COERCE_MARKER (end);
7659       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7660         args_out_of_range (start, end);
7661       if (NILP (current_buffer->enable_multibyte_characters))
7662         return Qnil;
7663       start_byte = CHAR_TO_BYTE (XINT (start));
7664       end_byte = CHAR_TO_BYTE (XINT (end));
7665       if (XINT (end) - XINT (start) == end_byte - start_byte)
7666         return Qt;
7667
7668       if (XINT (start) < GPT && XINT (end) > GPT)
7669         {
7670           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7671             move_gap_both (XINT (start), start_byte);
7672           else
7673             move_gap_both (XINT (end), end_byte);
7674         }
7675       pos = XINT (start);
7676     }
7677
7678   list = Qnil;
7679   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7680     {
7681       elt = XCAR (tail);
7682       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
7683       ASET (attrs, coding_attr_trans_tbl,
7684             get_translation_table (attrs, 1, NULL));
7685       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
7686     }
7687
7688   if (STRINGP (start))
7689     p = pbeg = SDATA (start);
7690   else
7691     p = pbeg = BYTE_POS_ADDR (start_byte);
7692   pend = p + (end_byte - start_byte);
7693
7694   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7695   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7696
7697   while (p < pend)
7698     {
7699       if (ASCII_BYTE_P (*p))
7700         p++;
7701       else
7702         {
7703           c = STRING_CHAR_ADVANCE (p);
7704
7705           charset_map_loaded = 0;
7706           for (tail = list; CONSP (tail); tail = XCDR (tail))
7707             {
7708               elt = XCDR (XCAR (tail));
7709               if (! char_encodable_p (c, XCAR (elt)))
7710                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7711             }
7712           if (charset_map_loaded)
7713             {
7714               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7715
7716               if (STRINGP (start))
7717                 pbeg = SDATA (start);
7718               else
7719                 pbeg = BYTE_POS_ADDR (start_byte);
7720               p = pbeg + p_offset;
7721               pend = pbeg + pend_offset;
7722             }
7723         }
7724       pos++;
7725     }
7726
7727   tail = list;
7728   list = Qnil;
7729   for (; CONSP (tail); tail = XCDR (tail))
7730     {
7731       elt = XCAR (tail);
7732       if (CONSP (XCDR (XCDR (elt))))
7733         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7734                       list);
7735     }
7736
7737   return list;
7738 }
7739
7740
7741 Lisp_Object
7742 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7743      Lisp_Object start, end, coding_system, dst_object;
7744      int encodep, norecord;
7745 {
7746   struct coding_system coding;
7747   EMACS_INT from, from_byte, to, to_byte;
7748   Lisp_Object src_object;
7749
7750   CHECK_NUMBER_COERCE_MARKER (start);
7751   CHECK_NUMBER_COERCE_MARKER (end);
7752   if (NILP (coding_system))
7753     coding_system = Qno_conversion;
7754   else
7755     CHECK_CODING_SYSTEM (coding_system);
7756   src_object = Fcurrent_buffer ();
7757   if (NILP (dst_object))
7758     dst_object = src_object;
7759   else if (! EQ (dst_object, Qt))
7760     CHECK_BUFFER (dst_object);
7761
7762   validate_region (&start, &end);
7763   from = XFASTINT (start);
7764   from_byte = CHAR_TO_BYTE (from);
7765   to = XFASTINT (end);
7766   to_byte = CHAR_TO_BYTE (to);
7767
7768   setup_coding_system (coding_system, &coding);
7769   coding.mode |= CODING_MODE_LAST_BLOCK;
7770
7771   if (encodep)
7772     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7773                           dst_object);
7774   else
7775     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7776                           dst_object);
7777   if (! norecord)
7778     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7779
7780   return (BUFFERP (dst_object)
7781           ? make_number (coding.produced_char)
7782           : coding.dst_object);
7783 }
7784
7785
7786 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
7787        3, 4, "r\nzCoding system: ",
7788        doc: /* Decode the current region from the specified coding system.
7789 When called from a program, takes four arguments:
7790         START, END, CODING-SYSTEM, and DESTINATION.
7791 START and END are buffer positions.
7792
7793 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7794 If nil, the region between START and END is replace by the decoded text.
7795 If buffer, the decoded text is inserted in the buffer.
7796 If t, the decoded text is returned.
7797
7798 This function sets `last-coding-system-used' to the precise coding system
7799 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7800 not fully specified.)
7801 It returns the length of the decoded text.  */)
7802      (start, end, coding_system, destination)
7803      Lisp_Object start, end, coding_system, destination;
7804 {
7805   return code_convert_region (start, end, coding_system, destination, 0, 0);
7806 }
7807
7808 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
7809        3, 4, "r\nzCoding system: ",
7810        doc: /* Encode the current region by specified coding system.
7811 When called from a program, takes three arguments:
7812 START, END, and CODING-SYSTEM.  START and END are buffer positions.
7813
7814 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7815 If nil, the region between START and END is replace by the encoded text.
7816 If buffer, the encoded text is inserted in the buffer.
7817 If t, the encoded text is returned.
7818
7819 This function sets `last-coding-system-used' to the precise coding system
7820 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7821 not fully specified.)
7822 It returns the length of the encoded text.  */)
7823   (start, end, coding_system, destination)
7824      Lisp_Object start, end, coding_system, destination;
7825 {
7826   return code_convert_region (start, end, coding_system, destination, 1, 0);
7827 }
7828
7829 Lisp_Object
7830 code_convert_string (string, coding_system, dst_object,
7831                      encodep, nocopy, norecord)
7832      Lisp_Object string, coding_system, dst_object;
7833      int encodep, nocopy, norecord;
7834 {
7835   struct coding_system coding;
7836   EMACS_INT chars, bytes;
7837
7838   CHECK_STRING (string);
7839   if (NILP (coding_system))
7840     {
7841       if (! norecord)
7842         Vlast_coding_system_used = Qno_conversion;
7843       if (NILP (dst_object))
7844         return (nocopy ? Fcopy_sequence (string) : string);
7845     }
7846
7847   if (NILP (coding_system))
7848     coding_system = Qno_conversion;
7849   else
7850     CHECK_CODING_SYSTEM (coding_system);
7851   if (NILP (dst_object))
7852     dst_object = Qt;
7853   else if (! EQ (dst_object, Qt))
7854     CHECK_BUFFER (dst_object);
7855
7856   setup_coding_system (coding_system, &coding);
7857   coding.mode |= CODING_MODE_LAST_BLOCK;
7858   chars = SCHARS (string);
7859   bytes = SBYTES (string);
7860   if (encodep)
7861     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7862   else
7863     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7864   if (! norecord)
7865     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7866
7867   return (BUFFERP (dst_object)
7868           ? make_number (coding.produced_char)
7869           : coding.dst_object);
7870 }
7871
7872
7873 /* Encode or decode STRING according to CODING_SYSTEM.
7874    Do not set Vlast_coding_system_used.
7875
7876    This function is called only from macros DECODE_FILE and
7877    ENCODE_FILE, thus we ignore character composition.  */
7878
7879 Lisp_Object
7880 code_convert_string_norecord (string, coding_system, encodep)
7881      Lisp_Object string, coding_system;
7882      int encodep;
7883 {
7884   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
7885 }
7886
7887
7888 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7889        2, 4, 0,
7890        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7891
7892 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7893 if the decoding operation is trivial.
7894
7895 Optional fourth arg BUFFER non-nil meant that the decoded text is
7896 inserted in BUFFER instead of returned as a string.  In this case,
7897 the return value is BUFFER.
7898
7899 This function sets `last-coding-system-used' to the precise coding system
7900 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7901 not fully specified.  */)
7902   (string, coding_system, nocopy, buffer)
7903      Lisp_Object string, coding_system, nocopy, buffer;
7904 {
7905   return code_convert_string (string, coding_system, buffer,
7906                               0, ! NILP (nocopy), 0);
7907 }
7908
7909 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7910        2, 4, 0,
7911        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7912
7913 Optional third arg NOCOPY non-nil means it is OK to return STRING
7914 itself if the encoding operation is trivial.
7915
7916 Optional fourth arg BUFFER non-nil meant that the encoded text is
7917 inserted in BUFFER instead of returned as a string.  In this case,
7918 the return value is BUFFER.
7919
7920 This function sets `last-coding-system-used' to the precise coding system
7921 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7922 not fully specified.)  */)
7923      (string, coding_system, nocopy, buffer)
7924      Lisp_Object string, coding_system, nocopy, buffer;
7925 {
7926   return code_convert_string (string, coding_system, buffer,
7927                               1, ! NILP (nocopy), 1);
7928 }
7929
7930 \f
7931 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7932        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7933 Return the corresponding character.  */)
7934      (code)
7935      Lisp_Object code;
7936 {
7937   Lisp_Object spec, attrs, val;
7938   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7939   int c;
7940
7941   CHECK_NATNUM (code);
7942   c = XFASTINT (code);
7943   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7944   attrs = AREF (spec, 0);
7945
7946   if (ASCII_BYTE_P (c)
7947       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7948     return code;
7949
7950   val = CODING_ATTR_CHARSET_LIST (attrs);
7951   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7952   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7953   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
7954
7955   if (c <= 0x7F)
7956     charset = charset_roman;
7957   else if (c >= 0xA0 && c < 0xDF)
7958     {
7959       charset = charset_kana;
7960       c -= 0x80;
7961     }
7962   else
7963     {
7964       int s1 = c >> 8, s2 = c & 0xFF;
7965
7966       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7967           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7968         error ("Invalid code: %d", code);
7969       SJIS_TO_JIS (c);
7970       charset = charset_kanji;
7971     }
7972   c = DECODE_CHAR (charset, c);
7973   if (c < 0)
7974     error ("Invalid code: %d", code);
7975   return make_number (c);
7976 }
7977
7978
7979 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7980        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7981 Return the corresponding code in SJIS.  */)
7982      (ch)
7983     Lisp_Object ch;
7984 {
7985   Lisp_Object spec, attrs, charset_list;
7986   int c;
7987   struct charset *charset;
7988   unsigned code;
7989
7990   CHECK_CHARACTER (ch);
7991   c = XFASTINT (ch);
7992   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7993   attrs = AREF (spec, 0);
7994
7995   if (ASCII_CHAR_P (c)
7996       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7997     return ch;
7998
7999   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8000   charset = char_charset (c, charset_list, &code);
8001   if (code == CHARSET_INVALID_CODE (charset))
8002     error ("Can't encode by shift_jis encoding: %d", c);
8003   JIS_TO_SJIS (code);
8004
8005   return make_number (code);
8006 }
8007
8008 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
8009        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8010 Return the corresponding character.  */)
8011      (code)
8012      Lisp_Object code;
8013 {
8014   Lisp_Object spec, attrs, val;
8015   struct charset *charset_roman, *charset_big5, *charset;
8016   int c;
8017
8018   CHECK_NATNUM (code);
8019   c = XFASTINT (code);
8020   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8021   attrs = AREF (spec, 0);
8022
8023   if (ASCII_BYTE_P (c)
8024       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8025     return code;
8026
8027   val = CODING_ATTR_CHARSET_LIST (attrs);
8028   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8029   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
8030
8031   if (c <= 0x7F)
8032     charset = charset_roman;
8033   else
8034     {
8035       int b1 = c >> 8, b2 = c & 0x7F;
8036       if (b1 < 0xA1 || b1 > 0xFE
8037           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8038         error ("Invalid code: %d", code);
8039       charset = charset_big5;
8040     }
8041   c = DECODE_CHAR (charset, (unsigned )c);
8042   if (c < 0)
8043     error ("Invalid code: %d", code);
8044   return make_number (c);
8045 }
8046
8047 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8048        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
8049 Return the corresponding character code in Big5.  */)
8050      (ch)
8051      Lisp_Object ch;
8052 {
8053   Lisp_Object spec, attrs, charset_list;
8054   struct charset *charset;
8055   int c;
8056   unsigned code;
8057
8058   CHECK_CHARACTER (ch);
8059   c = XFASTINT (ch);
8060   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8061   attrs = AREF (spec, 0);
8062   if (ASCII_CHAR_P (c)
8063       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8064     return ch;
8065
8066   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8067   charset = char_charset (c, charset_list, &code);
8068   if (code == CHARSET_INVALID_CODE (charset))
8069     error ("Can't encode by Big5 encoding: %d", c);
8070
8071   return make_number (code);
8072 }
8073
8074 \f
8075 DEFUN ("set-terminal-coding-system-internal",
8076        Fset_terminal_coding_system_internal,
8077        Sset_terminal_coding_system_internal, 1, 1, 0,
8078        doc: /* Internal use only.  */)
8079      (coding_system)
8080      Lisp_Object coding_system;
8081 {
8082   CHECK_SYMBOL (coding_system);
8083   setup_coding_system (Fcheck_coding_system (coding_system),
8084                         &terminal_coding);
8085
8086   /* We had better not send unsafe characters to terminal.  */
8087   terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
8088   /* Characer composition should be disabled.  */
8089   terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8090   terminal_coding.src_multibyte = 1;
8091   terminal_coding.dst_multibyte = 0;
8092   return Qnil;
8093 }
8094
8095 DEFUN ("set-safe-terminal-coding-system-internal",
8096        Fset_safe_terminal_coding_system_internal,
8097        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
8098        doc: /* Internal use only.  */)
8099      (coding_system)
8100      Lisp_Object coding_system;
8101 {
8102   CHECK_SYMBOL (coding_system);
8103   setup_coding_system (Fcheck_coding_system (coding_system),
8104                        &safe_terminal_coding);
8105   /* Characer composition should be disabled.  */
8106   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8107   safe_terminal_coding.src_multibyte = 1;
8108   safe_terminal_coding.dst_multibyte = 0;
8109   return Qnil;
8110 }
8111
8112 DEFUN ("terminal-coding-system",
8113        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
8114        doc: /* Return coding system specified for terminal output.  */)
8115      ()
8116 {
8117   return CODING_ID_NAME (terminal_coding.id);
8118 }
8119
8120 DEFUN ("set-keyboard-coding-system-internal",
8121        Fset_keyboard_coding_system_internal,
8122        Sset_keyboard_coding_system_internal, 1, 1, 0,
8123        doc: /* Internal use only.  */)
8124      (coding_system)
8125      Lisp_Object coding_system;
8126 {
8127   CHECK_SYMBOL (coding_system);
8128   setup_coding_system (Fcheck_coding_system (coding_system),
8129                        &keyboard_coding);
8130   /* Characer composition should be disabled.  */
8131   keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8132   return Qnil;
8133 }
8134
8135 DEFUN ("keyboard-coding-system",
8136        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
8137        doc: /* Return coding system specified for decoding keyboard input.  */)
8138      ()
8139 {
8140   return CODING_ID_NAME (keyboard_coding.id);
8141 }
8142
8143 \f
8144 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8145        Sfind_operation_coding_system,  1, MANY, 0,
8146        doc: /* Choose a coding system for an operation based on the target name.
8147 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8148 DECODING-SYSTEM is the coding system to use for decoding
8149 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8150 for encoding (in case OPERATION does encoding).
8151
8152 The first argument OPERATION specifies an I/O primitive:
8153   For file I/O, `insert-file-contents' or `write-region'.
8154   For process I/O, `call-process', `call-process-region', or `start-process'.
8155   For network I/O, `open-network-stream'.
8156
8157 The remaining arguments should be the same arguments that were passed
8158 to the primitive.  Depending on which primitive, one of those arguments
8159 is selected as the TARGET.  For example, if OPERATION does file I/O,
8160 whichever argument specifies the file name is TARGET.
8161
8162 TARGET has a meaning which depends on OPERATION:
8163   For file I/O, TARGET is a file name.
8164   For process I/O, TARGET is a process name.
8165   For network I/O, TARGET is a service name or a port number
8166
8167 This function looks up what specified for TARGET in,
8168 `file-coding-system-alist', `process-coding-system-alist',
8169 or `network-coding-system-alist' depending on OPERATION.
8170 They may specify a coding system, a cons of coding systems,
8171 or a function symbol to call.
8172 In the last case, we call the function with one argument,
8173 which is a list of all the arguments given to this function.
8174
8175 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
8176      (nargs, args)
8177      int nargs;
8178      Lisp_Object *args;
8179 {
8180   Lisp_Object operation, target_idx, target, val;
8181   register Lisp_Object chain;
8182
8183   if (nargs < 2)
8184     error ("Too few arguments");
8185   operation = args[0];
8186   if (!SYMBOLP (operation)
8187       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
8188     error ("Invalid first arguement");
8189   if (nargs < 1 + XINT (target_idx))
8190     error ("Too few arguments for operation: %s",
8191            SDATA (SYMBOL_NAME (operation)));
8192   target = args[XINT (target_idx) + 1];
8193   if (!(STRINGP (target)
8194         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
8195     error ("Invalid %dth argument", XINT (target_idx) + 1);
8196
8197   chain = ((EQ (operation, Qinsert_file_contents)
8198             || EQ (operation, Qwrite_region))
8199            ? Vfile_coding_system_alist
8200            : (EQ (operation, Qopen_network_stream)
8201               ? Vnetwork_coding_system_alist
8202               : Vprocess_coding_system_alist));
8203   if (NILP (chain))
8204     return Qnil;
8205
8206   for (; CONSP (chain); chain = XCDR (chain))
8207     {
8208       Lisp_Object elt;
8209
8210       elt = XCAR (chain);
8211       if (CONSP (elt)
8212           && ((STRINGP (target)
8213                && STRINGP (XCAR (elt))
8214                && fast_string_match (XCAR (elt), target) >= 0)
8215               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
8216         {
8217           val = XCDR (elt);
8218           /* Here, if VAL is both a valid coding system and a valid
8219              function symbol, we return VAL as a coding system.  */
8220           if (CONSP (val))
8221             return val;
8222           if (! SYMBOLP (val))
8223             return Qnil;
8224           if (! NILP (Fcoding_system_p (val)))
8225             return Fcons (val, val);
8226           if (! NILP (Ffboundp (val)))
8227             {
8228               val = call1 (val, Flist (nargs, args));
8229               if (CONSP (val))
8230                 return val;
8231               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8232                 return Fcons (val, val);
8233             }
8234           return Qnil;
8235         }
8236     }
8237   return Qnil;
8238 }
8239
8240 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
8241        Sset_coding_system_priority, 0, MANY, 0,
8242        doc: /* Assign higher priority to the coding systems given as arguments.
8243 If multiple coding systems belongs to the same category,
8244 all but the first one are ignored.
8245
8246 usage: (set-coding-system-priority ...)  */)
8247      (nargs, args)
8248      int nargs;
8249      Lisp_Object *args;
8250 {
8251   int i, j;
8252   int changed[coding_category_max];
8253   enum coding_category priorities[coding_category_max];
8254
8255   bzero (changed, sizeof changed);
8256
8257   for (i = j = 0; i < nargs; i++)
8258     {
8259       enum coding_category category;
8260       Lisp_Object spec, attrs;
8261
8262       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8263       attrs = AREF (spec, 0);
8264       category = XINT (CODING_ATTR_CATEGORY (attrs));
8265       if (changed[category])
8266         /* Ignore this coding system because a coding system of the
8267            same category already had a higher priority.  */
8268         continue;
8269       changed[category] = 1;
8270       priorities[j++] = category;
8271       if (coding_categories[category].id >= 0
8272           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8273         setup_coding_system (args[i], &coding_categories[category]);
8274       Fset (AREF (Vcoding_category_table, category), args[i]);
8275     }
8276
8277   /* Now we have decided top J priorities.  Reflect the order of the
8278      original priorities to the remaining priorities.  */
8279
8280   for (i = j, j = 0; i < coding_category_max; i++, j++)
8281     {
8282       while (j < coding_category_max
8283              && changed[coding_priorities[j]])
8284         j++;
8285       if (j == coding_category_max)
8286         abort ();
8287       priorities[i] = coding_priorities[j];
8288     }
8289
8290   bcopy (priorities, coding_priorities, sizeof priorities);
8291
8292   /* Update `coding-category-list'.  */
8293   Vcoding_category_list = Qnil;
8294   for (i = coding_category_max - 1; i >= 0; i--)
8295     Vcoding_category_list
8296       = Fcons (AREF (Vcoding_category_table, priorities[i]),
8297                Vcoding_category_list);
8298
8299   return Qnil;
8300 }
8301
8302 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8303        Scoding_system_priority_list, 0, 1, 0,
8304        doc: /* Return a list of coding systems ordered by their priorities.
8305 HIGHESTP non-nil means just return the highest priority one.  */)
8306      (highestp)
8307      Lisp_Object highestp;
8308 {
8309   int i;
8310   Lisp_Object val;
8311
8312   for (i = 0, val = Qnil; i < coding_category_max; i++)
8313     {
8314       enum coding_category category = coding_priorities[i];
8315       int id = coding_categories[category].id;
8316       Lisp_Object attrs;
8317
8318       if (id < 0)
8319         continue;
8320       attrs = CODING_ID_ATTRS (id);
8321       if (! NILP (highestp))
8322         return CODING_ATTR_BASE_NAME (attrs);
8323       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8324     }
8325   return Fnreverse (val);
8326 }
8327
8328 static char *suffixes[] = { "-unix", "-dos", "-mac" };
8329
8330 static Lisp_Object
8331 make_subsidiaries (base)
8332      Lisp_Object base;
8333 {
8334   Lisp_Object subsidiaries;
8335   int base_name_len = SBYTES (SYMBOL_NAME (base));
8336   char *buf = (char *) alloca (base_name_len + 6);
8337   int i;
8338
8339   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
8340   subsidiaries = Fmake_vector (make_number (3), Qnil);
8341   for (i = 0; i < 3; i++)
8342     {
8343       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
8344       ASET (subsidiaries, i, intern (buf));
8345     }
8346   return subsidiaries;
8347 }
8348
8349
8350 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
8351        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
8352        doc: /* For internal use only.
8353 usage: (define-coding-system-internal ...)  */)
8354      (nargs, args)
8355      int nargs;
8356      Lisp_Object *args;
8357 {
8358   Lisp_Object name;
8359   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
8360   Lisp_Object attrs;            /* Vector of attributes.  */
8361   Lisp_Object eol_type;
8362   Lisp_Object aliases;
8363   Lisp_Object coding_type, charset_list, safe_charsets;
8364   enum coding_category category;
8365   Lisp_Object tail, val;
8366   int max_charset_id = 0;
8367   int i;
8368
8369   if (nargs < coding_arg_max)
8370     goto short_args;
8371
8372   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
8373
8374   name = args[coding_arg_name];
8375   CHECK_SYMBOL (name);
8376   CODING_ATTR_BASE_NAME (attrs) = name;
8377
8378   val = args[coding_arg_mnemonic];
8379   if (! STRINGP (val))
8380     CHECK_CHARACTER (val);
8381   CODING_ATTR_MNEMONIC (attrs) = val;
8382
8383   coding_type = args[coding_arg_coding_type];
8384   CHECK_SYMBOL (coding_type);
8385   CODING_ATTR_TYPE (attrs) = coding_type;
8386
8387   charset_list = args[coding_arg_charset_list];
8388   if (SYMBOLP (charset_list))
8389     {
8390       if (EQ (charset_list, Qiso_2022))
8391         {
8392           if (! EQ (coding_type, Qiso_2022))
8393             error ("Invalid charset-list");
8394           charset_list = Viso_2022_charset_list;
8395         }
8396       else if (EQ (charset_list, Qemacs_mule))
8397         {
8398           if (! EQ (coding_type, Qemacs_mule))
8399             error ("Invalid charset-list");
8400           charset_list = Vemacs_mule_charset_list;
8401         }
8402       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8403         if (max_charset_id < XFASTINT (XCAR (tail)))
8404           max_charset_id = XFASTINT (XCAR (tail));
8405     }
8406   else
8407     {
8408       charset_list = Fcopy_sequence (charset_list);
8409       for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
8410         {
8411           struct charset *charset;
8412
8413           val = Fcar (tail);
8414           CHECK_CHARSET_GET_CHARSET (val, charset);
8415           if (EQ (coding_type, Qiso_2022)
8416               ? CHARSET_ISO_FINAL (charset) < 0
8417               : EQ (coding_type, Qemacs_mule)
8418               ? CHARSET_EMACS_MULE_ID (charset) < 0
8419               : 0)
8420             error ("Can't handle charset `%s'",
8421                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8422
8423           XSETCAR (tail, make_number (charset->id));
8424           if (max_charset_id < charset->id)
8425             max_charset_id = charset->id;
8426         }
8427     }
8428   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
8429
8430   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8431                                 make_number (255));
8432   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8433     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
8434   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
8435
8436   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
8437
8438   val = args[coding_arg_decode_translation_table];
8439   if (! CHAR_TABLE_P (val) && ! CONSP (val))
8440     CHECK_SYMBOL (val);
8441   CODING_ATTR_DECODE_TBL (attrs) = val;
8442
8443   val = args[coding_arg_encode_translation_table];
8444   if (! CHAR_TABLE_P (val) && ! CONSP (val))
8445     CHECK_SYMBOL (val);
8446   CODING_ATTR_ENCODE_TBL (attrs) = val;
8447
8448   val = args[coding_arg_post_read_conversion];
8449   CHECK_SYMBOL (val);
8450   CODING_ATTR_POST_READ (attrs) = val;
8451
8452   val = args[coding_arg_pre_write_conversion];
8453   CHECK_SYMBOL (val);
8454   CODING_ATTR_PRE_WRITE (attrs) = val;
8455
8456   val = args[coding_arg_default_char];
8457   if (NILP (val))
8458     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8459   else
8460     {
8461       CHECK_CHARACTER (val);
8462       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8463     }
8464
8465   val = args[coding_arg_for_unibyte];
8466   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
8467
8468   val = args[coding_arg_plist];
8469   CHECK_LIST (val);
8470   CODING_ATTR_PLIST (attrs) = val;
8471
8472   if (EQ (coding_type, Qcharset))
8473     {
8474       /* Generate a lisp vector of 256 elements.  Each element is nil,
8475          integer, or a list of charset IDs.
8476
8477          If Nth element is nil, the byte code N is invalid in this
8478          coding system.
8479
8480          If Nth element is a number NUM, N is the first byte of a
8481          charset whose ID is NUM.
8482
8483          If Nth element is a list of charset IDs, N is the first byte
8484          of one of them.  The list is sorted by dimensions of the
8485          charsets.  A charset of smaller dimension comes firtst. */
8486       val = Fmake_vector (make_number (256), Qnil);
8487
8488       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8489         {
8490           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8491           int dim = CHARSET_DIMENSION (charset);
8492           int idx = (dim - 1) * 4;
8493
8494           if (CHARSET_ASCII_COMPATIBLE_P (charset))
8495             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8496
8497           for (i = charset->code_space[idx];
8498                i <= charset->code_space[idx + 1]; i++)
8499             {
8500               Lisp_Object tmp, tmp2;
8501               int dim2;
8502
8503               tmp = AREF (val, i);
8504               if (NILP (tmp))
8505                 tmp = XCAR (tail);
8506               else if (NUMBERP (tmp))
8507                 {
8508                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8509                   if (dim < dim2)
8510                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
8511                   else
8512                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
8513                 }
8514               else
8515                 {
8516                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8517                     {
8518                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8519                       if (dim < dim2)
8520                         break;
8521                     }
8522                   if (NILP (tmp2))
8523                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8524                   else
8525                     {
8526                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8527                       XSETCAR (tmp2, XCAR (tail));
8528                     }
8529                 }
8530               ASET (val, i, tmp);
8531             }
8532         }
8533       ASET (attrs, coding_attr_charset_valids, val);
8534       category = coding_category_charset;
8535     }
8536   else if (EQ (coding_type, Qccl))
8537     {
8538       Lisp_Object valids;
8539
8540       if (nargs < coding_arg_ccl_max)
8541         goto short_args;
8542
8543       val = args[coding_arg_ccl_decoder];
8544       CHECK_CCL_PROGRAM (val);
8545       if (VECTORP (val))
8546         val = Fcopy_sequence (val);
8547       ASET (attrs, coding_attr_ccl_decoder, val);
8548
8549       val = args[coding_arg_ccl_encoder];
8550       CHECK_CCL_PROGRAM (val);
8551       if (VECTORP (val))
8552         val = Fcopy_sequence (val);
8553       ASET (attrs, coding_attr_ccl_encoder, val);
8554
8555       val = args[coding_arg_ccl_valids];
8556       valids = Fmake_string (make_number (256), make_number (0));
8557       for (tail = val; !NILP (tail); tail = Fcdr (tail))
8558         {
8559           int from, to;
8560
8561           val = Fcar (tail);
8562           if (INTEGERP (val))
8563             {
8564               from = to = XINT (val);
8565               if (from < 0 || from > 255)
8566                 args_out_of_range_3 (val, make_number (0), make_number (255));
8567             }
8568           else
8569             {
8570               CHECK_CONS (val);
8571               CHECK_NATNUM_CAR (val);
8572               CHECK_NATNUM_CDR (val);
8573               from = XINT (XCAR (val));
8574               if (from > 255)
8575                 args_out_of_range_3 (XCAR (val),
8576                                      make_number (0), make_number (255));
8577               to = XINT (XCDR (val));
8578               if (to < from || to > 255)
8579                 args_out_of_range_3 (XCDR (val),
8580                                      XCAR (val), make_number (255));
8581             }
8582           for (i = from; i <= to; i++)
8583             SSET (valids, i, 1);
8584         }
8585       ASET (attrs, coding_attr_ccl_valids, valids);
8586
8587       category = coding_category_ccl;
8588     }
8589   else if (EQ (coding_type, Qutf_16))
8590     {
8591       Lisp_Object bom, endian;
8592
8593       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8594
8595       if (nargs < coding_arg_utf16_max)
8596         goto short_args;
8597
8598       bom = args[coding_arg_utf16_bom];
8599       if (! NILP (bom) && ! EQ (bom, Qt))
8600         {
8601           CHECK_CONS (bom);
8602           val = XCAR (bom);
8603           CHECK_CODING_SYSTEM (val);
8604           val = XCDR (bom);
8605           CHECK_CODING_SYSTEM (val);
8606         }
8607       ASET (attrs, coding_attr_utf_16_bom, bom);
8608
8609       endian = args[coding_arg_utf16_endian];
8610       CHECK_SYMBOL (endian);
8611       if (NILP (endian))
8612         endian = Qbig;
8613       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8614         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
8615       ASET (attrs, coding_attr_utf_16_endian, endian);
8616
8617       category = (CONSP (bom)
8618                   ? coding_category_utf_16_auto
8619                   : NILP (bom)
8620                   ? (EQ (endian, Qbig)
8621                      ? coding_category_utf_16_be_nosig
8622                      : coding_category_utf_16_le_nosig)
8623                   : (EQ (endian, Qbig)
8624                      ? coding_category_utf_16_be
8625                      : coding_category_utf_16_le));
8626     }
8627   else if (EQ (coding_type, Qiso_2022))
8628     {
8629       Lisp_Object initial, reg_usage, request, flags;
8630       int i;
8631
8632       if (nargs < coding_arg_iso2022_max)
8633         goto short_args;
8634
8635       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8636       CHECK_VECTOR (initial);
8637       for (i = 0; i < 4; i++)
8638         {
8639           val = Faref (initial, make_number (i));
8640           if (! NILP (val))
8641             {
8642               struct charset *charset;
8643
8644               CHECK_CHARSET_GET_CHARSET (val, charset);
8645               ASET (initial, i, make_number (CHARSET_ID (charset)));
8646               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8647                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8648             }
8649           else
8650             ASET (initial, i, make_number (-1));
8651         }
8652
8653       reg_usage = args[coding_arg_iso2022_reg_usage];
8654       CHECK_CONS (reg_usage);
8655       CHECK_NUMBER_CAR (reg_usage);
8656       CHECK_NUMBER_CDR (reg_usage);
8657
8658       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8659       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
8660         {
8661           int id;
8662           Lisp_Object tmp;
8663
8664           val = Fcar (tail);
8665           CHECK_CONS (val);
8666           tmp = XCAR (val);
8667           CHECK_CHARSET_GET_ID (tmp, id);
8668           CHECK_NATNUM_CDR (val);
8669           if (XINT (XCDR (val)) >= 4)
8670             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8671           XSETCAR (val, make_number (id));
8672         }
8673
8674       flags = args[coding_arg_iso2022_flags];
8675       CHECK_NATNUM (flags);
8676       i = XINT (flags);
8677       if (EQ (args[coding_arg_charset_list], Qiso_2022))
8678         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8679
8680       ASET (attrs, coding_attr_iso_initial, initial);
8681       ASET (attrs, coding_attr_iso_usage, reg_usage);
8682       ASET (attrs, coding_attr_iso_request, request);
8683       ASET (attrs, coding_attr_iso_flags, flags);
8684       setup_iso_safe_charsets (attrs);
8685
8686       if (i & CODING_ISO_FLAG_SEVEN_BITS)
8687         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8688                           | CODING_ISO_FLAG_SINGLE_SHIFT))
8689                     ? coding_category_iso_7_else
8690                     : EQ (args[coding_arg_charset_list], Qiso_2022)
8691                     ? coding_category_iso_7
8692                     : coding_category_iso_7_tight);
8693       else
8694         {
8695           int id = XINT (AREF (initial, 1));
8696
8697           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
8698                        || EQ (args[coding_arg_charset_list], Qiso_2022)
8699                        || id < 0)
8700                       ? coding_category_iso_8_else
8701                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8702                       ? coding_category_iso_8_1
8703                       : coding_category_iso_8_2);
8704         }
8705       if (category != coding_category_iso_8_1
8706           && category != coding_category_iso_8_2)
8707         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8708     }
8709   else if (EQ (coding_type, Qemacs_mule))
8710     {
8711       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8712         ASET (attrs, coding_attr_emacs_mule_full, Qt);
8713       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8714       category = coding_category_emacs_mule;
8715     }
8716   else if (EQ (coding_type, Qshift_jis))
8717     {
8718
8719       struct charset *charset;
8720
8721       if (XINT (Flength (charset_list)) != 3
8722           && XINT (Flength (charset_list)) != 4)
8723         error ("There should be three or four charsets");
8724
8725       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8726       if (CHARSET_DIMENSION (charset) != 1)
8727         error ("Dimension of charset %s is not one",
8728                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8729       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8730         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8731
8732       charset_list = XCDR (charset_list);
8733       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8734       if (CHARSET_DIMENSION (charset) != 1)
8735         error ("Dimension of charset %s is not one",
8736                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8737
8738       charset_list = XCDR (charset_list);
8739       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8740       if (CHARSET_DIMENSION (charset) != 2)
8741         error ("Dimension of charset %s is not two",
8742                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8743
8744       charset_list = XCDR (charset_list);
8745       if (! NILP (charset_list))
8746         {
8747           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8748           if (CHARSET_DIMENSION (charset) != 2)
8749             error ("Dimension of charset %s is not two",
8750                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8751         }
8752
8753       category = coding_category_sjis;
8754       Vsjis_coding_system = name;
8755     }
8756   else if (EQ (coding_type, Qbig5))
8757     {
8758       struct charset *charset;
8759
8760       if (XINT (Flength (charset_list)) != 2)
8761         error ("There should be just two charsets");
8762
8763       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8764       if (CHARSET_DIMENSION (charset) != 1)
8765         error ("Dimension of charset %s is not one",
8766                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8767       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8768         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8769
8770       charset_list = XCDR (charset_list);
8771       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8772       if (CHARSET_DIMENSION (charset) != 2)
8773         error ("Dimension of charset %s is not two",
8774                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8775
8776       category = coding_category_big5;
8777       Vbig5_coding_system = name;
8778     }
8779   else if (EQ (coding_type, Qraw_text))
8780     {
8781       category = coding_category_raw_text;
8782       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8783     }
8784   else if (EQ (coding_type, Qutf_8))
8785     {
8786       category = coding_category_utf_8;
8787       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8788     }
8789   else if (EQ (coding_type, Qundecided))
8790     category = coding_category_undecided;
8791   else
8792     error ("Invalid coding system type: %s",
8793            SDATA (SYMBOL_NAME (coding_type)));
8794
8795   CODING_ATTR_CATEGORY (attrs) = make_number (category);
8796   CODING_ATTR_PLIST (attrs)
8797     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
8798                                 CODING_ATTR_PLIST (attrs)));
8799
8800   eol_type = args[coding_arg_eol_type];
8801   if (! NILP (eol_type)
8802       && ! EQ (eol_type, Qunix)
8803       && ! EQ (eol_type, Qdos)
8804       && ! EQ (eol_type, Qmac))
8805     error ("Invalid eol-type");
8806
8807   aliases = Fcons (name, Qnil);
8808
8809   if (NILP (eol_type))
8810     {
8811       eol_type = make_subsidiaries (name);
8812       for (i = 0; i < 3; i++)
8813         {
8814           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8815
8816           this_name = AREF (eol_type, i);
8817           this_aliases = Fcons (this_name, Qnil);
8818           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8819           this_spec = Fmake_vector (make_number (3), attrs);
8820           ASET (this_spec, 1, this_aliases);
8821           ASET (this_spec, 2, this_eol_type);
8822           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8823           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8824           Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8825                                         Vcoding_system_alist);
8826         }
8827     }
8828
8829   spec_vec = Fmake_vector (make_number (3), attrs);
8830   ASET (spec_vec, 1, aliases);
8831   ASET (spec_vec, 2, eol_type);
8832
8833   Fputhash (name, spec_vec, Vcoding_system_hash_table);
8834   Vcoding_system_list = Fcons (name, Vcoding_system_list);
8835   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8836                                 Vcoding_system_alist);
8837
8838   {
8839     int id = coding_categories[category].id;
8840
8841     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8842       setup_coding_system (name, &coding_categories[category]);
8843   }
8844
8845   return Qnil;
8846
8847  short_args:
8848   return Fsignal (Qwrong_number_of_arguments,
8849                   Fcons (intern ("define-coding-system-internal"),
8850                          make_number (nargs)));
8851 }
8852
8853
8854 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
8855        3, 3, 0,
8856        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
8857   (coding_system, prop, val)
8858      Lisp_Object coding_system, prop, val;
8859 {
8860   Lisp_Object spec, attrs, plist;
8861
8862   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8863   attrs = AREF (spec, 0);
8864   if (EQ (prop, QCmnemonic))
8865     {
8866       if (! STRINGP (val))
8867         CHECK_CHARACTER (val);
8868       CODING_ATTR_MNEMONIC (attrs) = val;
8869     }
8870   else if (EQ (prop, QCdefalut_char))
8871     {
8872       if (NILP (val))
8873         val = make_number (' ');
8874       else
8875         CHECK_CHARACTER (val);
8876       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8877     }
8878   else if (EQ (prop, QCdecode_translation_table))
8879     {
8880       if (! CHAR_TABLE_P (val) && ! CONSP (val))
8881         CHECK_SYMBOL (val);
8882       CODING_ATTR_DECODE_TBL (attrs) = val;
8883     }
8884   else if (EQ (prop, QCencode_translation_table))
8885     {
8886       if (! CHAR_TABLE_P (val) && ! CONSP (val))
8887         CHECK_SYMBOL (val);
8888       CODING_ATTR_ENCODE_TBL (attrs) = val;
8889     }
8890   else if (EQ (prop, QCpost_read_conversion))
8891     {
8892       CHECK_SYMBOL (val);
8893       CODING_ATTR_POST_READ (attrs) = val;
8894     }
8895   else if (EQ (prop, QCpre_write_conversion))
8896     {
8897       CHECK_SYMBOL (val);
8898       CODING_ATTR_PRE_WRITE (attrs) = val;
8899     }
8900
8901   CODING_ATTR_PLIST (attrs)
8902     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
8903   return val;
8904 }
8905
8906
8907 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8908        Sdefine_coding_system_alias, 2, 2, 0,
8909        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
8910      (alias, coding_system)
8911      Lisp_Object alias, coding_system;
8912 {
8913   Lisp_Object spec, aliases, eol_type;
8914
8915   CHECK_SYMBOL (alias);
8916   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8917   aliases = AREF (spec, 1);
8918   /* ALISES should be a list of length more than zero, and the first
8919      element is a base coding system.  Append ALIAS at the tail of the
8920      list.  */
8921   while (!NILP (XCDR (aliases)))
8922     aliases = XCDR (aliases);
8923   XSETCDR (aliases, Fcons (alias, Qnil));
8924
8925   eol_type = AREF (spec, 2);
8926   if (VECTORP (eol_type))
8927     {
8928       Lisp_Object subsidiaries;
8929       int i;
8930
8931       subsidiaries = make_subsidiaries (alias);
8932       for (i = 0; i < 3; i++)
8933         Fdefine_coding_system_alias (AREF (subsidiaries, i),
8934                                      AREF (eol_type, i));
8935     }
8936
8937   Fputhash (alias, spec, Vcoding_system_hash_table);
8938   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
8939   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8940                                 Vcoding_system_alist);
8941
8942   return Qnil;
8943 }
8944
8945 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8946        1, 1, 0,
8947        doc: /* Return the base of CODING-SYSTEM.
8948 Any alias or subsidiary coding system is not a base coding system.  */)
8949   (coding_system)
8950      Lisp_Object coding_system;
8951 {
8952   Lisp_Object spec, attrs;
8953
8954   if (NILP (coding_system))
8955     return (Qno_conversion);
8956   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8957   attrs = AREF (spec, 0);
8958   return CODING_ATTR_BASE_NAME (attrs);
8959 }
8960
8961 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8962        1, 1, 0,
8963        doc: "Return the property list of CODING-SYSTEM.")
8964      (coding_system)
8965      Lisp_Object coding_system;
8966 {
8967   Lisp_Object spec, attrs;
8968
8969   if (NILP (coding_system))
8970     coding_system = Qno_conversion;
8971   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8972   attrs = AREF (spec, 0);
8973   return CODING_ATTR_PLIST (attrs);
8974 }
8975
8976
8977 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8978        1, 1, 0,
8979        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
8980      (coding_system)
8981      Lisp_Object coding_system;
8982 {
8983   Lisp_Object spec;
8984
8985   if (NILP (coding_system))
8986     coding_system = Qno_conversion;
8987   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8988   return AREF (spec, 1);
8989 }
8990
8991 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8992        Scoding_system_eol_type, 1, 1, 0,
8993        doc: /* Return eol-type of CODING-SYSTEM.
8994 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8995
8996 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8997 and CR respectively.
8998
8999 A vector value indicates that a format of end-of-line should be
9000 detected automatically.  Nth element of the vector is the subsidiary
9001 coding system whose eol-type is N.  */)
9002      (coding_system)
9003      Lisp_Object coding_system;
9004 {
9005   Lisp_Object spec, eol_type;
9006   int n;
9007
9008   if (NILP (coding_system))
9009     coding_system = Qno_conversion;
9010   if (! CODING_SYSTEM_P (coding_system))
9011     return Qnil;
9012   spec = CODING_SYSTEM_SPEC (coding_system);
9013   eol_type = AREF (spec, 2);
9014   if (VECTORP (eol_type))
9015     return Fcopy_sequence (eol_type);
9016   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9017   return make_number (n);
9018 }
9019
9020 #endif /* emacs */
9021
9022 \f
9023 /*** 9. Post-amble ***/
9024
9025 void
9026 init_coding_once ()
9027 {
9028   int i;
9029
9030   for (i = 0; i < coding_category_max; i++)
9031     {
9032       coding_categories[i].id = -1;
9033       coding_priorities[i] = i;
9034     }
9035
9036   /* ISO2022 specific initialize routine.  */
9037   for (i = 0; i < 0x20; i++)
9038     iso_code_class[i] = ISO_control_0;
9039   for (i = 0x21; i < 0x7F; i++)
9040     iso_code_class[i] = ISO_graphic_plane_0;
9041   for (i = 0x80; i < 0xA0; i++)
9042     iso_code_class[i] = ISO_control_1;
9043   for (i = 0xA1; i < 0xFF; i++)
9044     iso_code_class[i] = ISO_graphic_plane_1;
9045   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9046   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
9047   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9048   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9049   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9050   iso_code_class[ISO_CODE_ESC] = ISO_escape;
9051   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9052   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9053   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9054
9055   for (i = 0; i < 256; i++)
9056     {
9057       emacs_mule_bytes[i] = 1;
9058     }
9059   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9060   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9061   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9062   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
9063 }
9064
9065 #ifdef emacs
9066
9067 void
9068 syms_of_coding ()
9069 {
9070   staticpro (&Vcoding_system_hash_table);
9071   {
9072     Lisp_Object args[2];
9073     args[0] = QCtest;
9074     args[1] = Qeq;
9075     Vcoding_system_hash_table = Fmake_hash_table (2, args);
9076   }
9077
9078   staticpro (&Vsjis_coding_system);
9079   Vsjis_coding_system = Qnil;
9080
9081   staticpro (&Vbig5_coding_system);
9082   Vbig5_coding_system = Qnil;
9083
9084   staticpro (&Vcode_conversion_reused_workbuf);
9085   Vcode_conversion_reused_workbuf = Qnil;
9086
9087   staticpro (&Vcode_conversion_workbuf_name);
9088   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
9089
9090   reused_workbuf_in_use = 0;
9091
9092   DEFSYM (Qcharset, "charset");
9093   DEFSYM (Qtarget_idx, "target-idx");
9094   DEFSYM (Qcoding_system_history, "coding-system-history");
9095   Fset (Qcoding_system_history, Qnil);
9096
9097   /* Target FILENAME is the first argument.  */
9098   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9099   /* Target FILENAME is the third argument.  */
9100   Fput (Qwrite_region, Qtarget_idx, make_number (2));
9101
9102   DEFSYM (Qcall_process, "call-process");
9103   /* Target PROGRAM is the first argument.  */
9104   Fput (Qcall_process, Qtarget_idx, make_number (0));
9105
9106   DEFSYM (Qcall_process_region, "call-process-region");
9107   /* Target PROGRAM is the third argument.  */
9108   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9109
9110   DEFSYM (Qstart_process, "start-process");
9111   /* Target PROGRAM is the third argument.  */
9112   Fput (Qstart_process, Qtarget_idx, make_number (2));
9113
9114   DEFSYM (Qopen_network_stream, "open-network-stream");
9115   /* Target SERVICE is the fourth argument.  */
9116   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9117
9118   DEFSYM (Qcoding_system, "coding-system");
9119   DEFSYM (Qcoding_aliases, "coding-aliases");
9120
9121   DEFSYM (Qeol_type, "eol-type");
9122   DEFSYM (Qunix, "unix");
9123   DEFSYM (Qdos, "dos");
9124
9125   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9126   DEFSYM (Qpost_read_conversion, "post-read-conversion");
9127   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9128   DEFSYM (Qdefault_char, "default-char");
9129   DEFSYM (Qundecided, "undecided");
9130   DEFSYM (Qno_conversion, "no-conversion");
9131   DEFSYM (Qraw_text, "raw-text");
9132
9133   DEFSYM (Qiso_2022, "iso-2022");
9134
9135   DEFSYM (Qutf_8, "utf-8");
9136   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
9137
9138   DEFSYM (Qutf_16, "utf-16");
9139   DEFSYM (Qbig, "big");
9140   DEFSYM (Qlittle, "little");
9141
9142   DEFSYM (Qshift_jis, "shift-jis");
9143   DEFSYM (Qbig5, "big5");
9144
9145   DEFSYM (Qcoding_system_p, "coding-system-p");
9146
9147   DEFSYM (Qcoding_system_error, "coding-system-error");
9148   Fput (Qcoding_system_error, Qerror_conditions,
9149         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9150   Fput (Qcoding_system_error, Qerror_message,
9151         build_string ("Invalid coding system"));
9152
9153   /* Intern this now in case it isn't already done.
9154      Setting this variable twice is harmless.
9155      But don't staticpro it here--that is done in alloc.c.  */
9156   Qchar_table_extra_slots = intern ("char-table-extra-slots");
9157
9158   DEFSYM (Qtranslation_table, "translation-table");
9159   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
9160   DEFSYM (Qtranslation_table_id, "translation-table-id");
9161   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9162   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
9163
9164   DEFSYM (Qvalid_codes, "valid-codes");
9165
9166   DEFSYM (Qemacs_mule, "emacs-mule");
9167
9168   DEFSYM (QCcategory, ":category");
9169   DEFSYM (QCmnemonic, ":mnemonic");
9170   DEFSYM (QCdefalut_char, ":default-char");
9171   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9172   DEFSYM (QCencode_translation_table, ":encode-translation-table");
9173   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9174   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
9175
9176   Vcoding_category_table
9177     = Fmake_vector (make_number (coding_category_max), Qnil);
9178   staticpro (&Vcoding_category_table);
9179   /* Followings are target of code detection.  */
9180   ASET (Vcoding_category_table, coding_category_iso_7,
9181         intern ("coding-category-iso-7"));
9182   ASET (Vcoding_category_table, coding_category_iso_7_tight,
9183         intern ("coding-category-iso-7-tight"));
9184   ASET (Vcoding_category_table, coding_category_iso_8_1,
9185         intern ("coding-category-iso-8-1"));
9186   ASET (Vcoding_category_table, coding_category_iso_8_2,
9187         intern ("coding-category-iso-8-2"));
9188   ASET (Vcoding_category_table, coding_category_iso_7_else,
9189         intern ("coding-category-iso-7-else"));
9190   ASET (Vcoding_category_table, coding_category_iso_8_else,
9191         intern ("coding-category-iso-8-else"));
9192   ASET (Vcoding_category_table, coding_category_utf_8,
9193         intern ("coding-category-utf-8"));
9194   ASET (Vcoding_category_table, coding_category_utf_16_be,
9195         intern ("coding-category-utf-16-be"));
9196   ASET (Vcoding_category_table, coding_category_utf_16_auto,
9197         intern ("coding-category-utf-16-auto"));
9198   ASET (Vcoding_category_table, coding_category_utf_16_le,
9199         intern ("coding-category-utf-16-le"));
9200   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9201         intern ("coding-category-utf-16-be-nosig"));
9202   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9203         intern ("coding-category-utf-16-le-nosig"));
9204   ASET (Vcoding_category_table, coding_category_charset,
9205         intern ("coding-category-charset"));
9206   ASET (Vcoding_category_table, coding_category_sjis,
9207         intern ("coding-category-sjis"));
9208   ASET (Vcoding_category_table, coding_category_big5,
9209         intern ("coding-category-big5"));
9210   ASET (Vcoding_category_table, coding_category_ccl,
9211         intern ("coding-category-ccl"));
9212   ASET (Vcoding_category_table, coding_category_emacs_mule,
9213         intern ("coding-category-emacs-mule"));
9214   /* Followings are NOT target of code detection.  */
9215   ASET (Vcoding_category_table, coding_category_raw_text,
9216         intern ("coding-category-raw-text"));
9217   ASET (Vcoding_category_table, coding_category_undecided,
9218         intern ("coding-category-undecided"));
9219
9220   DEFSYM (Qinsufficient_source, "insufficient-source");
9221   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9222   DEFSYM (Qinvalid_source, "invalid-source");
9223   DEFSYM (Qinterrupted, "interrupted");
9224   DEFSYM (Qinsufficient_memory, "insufficient-memory");
9225
9226   defsubr (&Scoding_system_p);
9227   defsubr (&Sread_coding_system);
9228   defsubr (&Sread_non_nil_coding_system);
9229   defsubr (&Scheck_coding_system);
9230   defsubr (&Sdetect_coding_region);
9231   defsubr (&Sdetect_coding_string);
9232   defsubr (&Sfind_coding_systems_region_internal);
9233   defsubr (&Sunencodable_char_position);
9234   defsubr (&Scheck_coding_systems_region);
9235   defsubr (&Sdecode_coding_region);
9236   defsubr (&Sencode_coding_region);
9237   defsubr (&Sdecode_coding_string);
9238   defsubr (&Sencode_coding_string);
9239   defsubr (&Sdecode_sjis_char);
9240   defsubr (&Sencode_sjis_char);
9241   defsubr (&Sdecode_big5_char);
9242   defsubr (&Sencode_big5_char);
9243   defsubr (&Sset_terminal_coding_system_internal);
9244   defsubr (&Sset_safe_terminal_coding_system_internal);
9245   defsubr (&Sterminal_coding_system);
9246   defsubr (&Sset_keyboard_coding_system_internal);
9247   defsubr (&Skeyboard_coding_system);
9248   defsubr (&Sfind_operation_coding_system);
9249   defsubr (&Sset_coding_system_priority);
9250   defsubr (&Sdefine_coding_system_internal);
9251   defsubr (&Sdefine_coding_system_alias);
9252   defsubr (&Scoding_system_put);
9253   defsubr (&Scoding_system_base);
9254   defsubr (&Scoding_system_plist);
9255   defsubr (&Scoding_system_aliases);
9256   defsubr (&Scoding_system_eol_type);
9257   defsubr (&Scoding_system_priority_list);
9258
9259   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
9260                doc: /* List of coding systems.
9261
9262 Do not alter the value of this variable manually.  This variable should be
9263 updated by the functions `define-coding-system' and
9264 `define-coding-system-alias'.  */);
9265   Vcoding_system_list = Qnil;
9266
9267   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
9268                doc: /* Alist of coding system names.
9269 Each element is one element list of coding system name.
9270 This variable is given to `completing-read' as TABLE argument.
9271
9272 Do not alter the value of this variable manually.  This variable should be
9273 updated by the functions `make-coding-system' and
9274 `define-coding-system-alias'.  */);
9275   Vcoding_system_alist = Qnil;
9276
9277   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
9278                doc: /* List of coding-categories (symbols) ordered by priority.
9279
9280 On detecting a coding system, Emacs tries code detection algorithms
9281 associated with each coding-category one by one in this order.  When
9282 one algorithm agrees with a byte sequence of source text, the coding
9283 system bound to the corresponding coding-category is selected.  */);
9284   {
9285     int i;
9286
9287     Vcoding_category_list = Qnil;
9288     for (i = coding_category_max - 1; i >= 0; i--)
9289       Vcoding_category_list
9290         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
9291                  Vcoding_category_list);
9292   }
9293
9294   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
9295                doc: /* Specify the coding system for read operations.
9296 It is useful to bind this variable with `let', but do not set it globally.
9297 If the value is a coding system, it is used for decoding on read operation.
9298 If not, an appropriate element is used from one of the coding system alists:
9299 There are three such tables, `file-coding-system-alist',
9300 `process-coding-system-alist', and `network-coding-system-alist'.  */);
9301   Vcoding_system_for_read = Qnil;
9302
9303   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
9304                doc: /* Specify the coding system for write operations.
9305 Programs bind this variable with `let', but you should not set it globally.
9306 If the value is a coding system, it is used for encoding of output,
9307 when writing it to a file and when sending it to a file or subprocess.
9308
9309 If this does not specify a coding system, an appropriate element
9310 is used from one of the coding system alists:
9311 There are three such tables, `file-coding-system-alist',
9312 `process-coding-system-alist', and `network-coding-system-alist'.
9313 For output to files, if the above procedure does not specify a coding system,
9314 the value of `buffer-file-coding-system' is used.  */);
9315   Vcoding_system_for_write = Qnil;
9316
9317   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
9318                doc: /*
9319 Coding system used in the latest file or process I/O.  */);
9320   Vlast_coding_system_used = Qnil;
9321
9322   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
9323                doc: /*
9324 Error status of the last code conversion.
9325
9326 When an error was detected in the last code conversion, this variable
9327 is set to one of the following symbols.
9328   `insufficient-source'
9329   `inconsistent-eol'
9330   `invalid-source'
9331   `interrupted'
9332   `insufficient-memory'
9333 When no error was detected, the value doesn't change.  So, to check
9334 the error status of a code conversion by this variable, you must
9335 explicitly set this variable to nil before performing code
9336 conversion.  */);
9337   Vlast_code_conversion_error = Qnil;
9338
9339   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
9340                doc: /*
9341 *Non-nil means always inhibit code conversion of end-of-line format.
9342 See info node `Coding Systems' and info node `Text and Binary' concerning
9343 such conversion.  */);
9344   inhibit_eol_conversion = 0;
9345
9346   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
9347                doc: /*
9348 Non-nil means process buffer inherits coding system of process output.
9349 Bind it to t if the process output is to be treated as if it were a file
9350 read from some filesystem.  */);
9351   inherit_process_coding_system = 0;
9352
9353   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
9354                doc: /*
9355 Alist to decide a coding system to use for a file I/O operation.
9356 The format is ((PATTERN . VAL) ...),
9357 where PATTERN is a regular expression matching a file name,
9358 VAL is a coding system, a cons of coding systems, or a function symbol.
9359 If VAL is a coding system, it is used for both decoding and encoding
9360 the file contents.
9361 If VAL is a cons of coding systems, the car part is used for decoding,
9362 and the cdr part is used for encoding.
9363 If VAL is a function symbol, the function must return a coding system
9364 or a cons of coding systems which are used as above.  The function gets
9365 the arguments with which `find-operation-coding-systems' was called.
9366
9367 See also the function `find-operation-coding-system'
9368 and the variable `auto-coding-alist'.  */);
9369   Vfile_coding_system_alist = Qnil;
9370
9371   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
9372                doc: /*
9373 Alist to decide a coding system to use for a process I/O operation.
9374 The format is ((PATTERN . VAL) ...),
9375 where PATTERN is a regular expression matching a program name,
9376 VAL is a coding system, a cons of coding systems, or a function symbol.
9377 If VAL is a coding system, it is used for both decoding what received
9378 from the program and encoding what sent to the program.
9379 If VAL is a cons of coding systems, the car part is used for decoding,
9380 and the cdr part is used for encoding.
9381 If VAL is a function symbol, the function must return a coding system
9382 or a cons of coding systems which are used as above.
9383
9384 See also the function `find-operation-coding-system'.  */);
9385   Vprocess_coding_system_alist = Qnil;
9386
9387   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
9388                doc: /*
9389 Alist to decide a coding system to use for a network I/O operation.
9390 The format is ((PATTERN . VAL) ...),
9391 where PATTERN is a regular expression matching a network service name
9392 or is a port number to connect to,
9393 VAL is a coding system, a cons of coding systems, or a function symbol.
9394 If VAL is a coding system, it is used for both decoding what received
9395 from the network stream and encoding what sent to the network stream.
9396 If VAL is a cons of coding systems, the car part is used for decoding,
9397 and the cdr part is used for encoding.
9398 If VAL is a function symbol, the function must return a coding system
9399 or a cons of coding systems which are used as above.
9400
9401 See also the function `find-operation-coding-system'.  */);
9402   Vnetwork_coding_system_alist = Qnil;
9403
9404   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
9405                doc: /* Coding system to use with system messages.
9406 Also used for decoding keyboard input on X Window system.  */);
9407   Vlocale_coding_system = Qnil;
9408
9409   /* The eol mnemonics are reset in startup.el system-dependently.  */
9410   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
9411                doc: /*
9412 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
9413   eol_mnemonic_unix = build_string (":");
9414
9415   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
9416                doc: /*
9417 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
9418   eol_mnemonic_dos = build_string ("\\");
9419
9420   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
9421                doc: /*
9422 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
9423   eol_mnemonic_mac = build_string ("/");
9424
9425   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
9426                doc: /*
9427 *String displayed in mode line when end-of-line format is not yet determined.  */);
9428   eol_mnemonic_undecided = build_string (":");
9429
9430   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
9431                doc: /*
9432 *Non-nil enables character translation while encoding and decoding.  */);
9433   Venable_character_translation = Qt;
9434
9435   DEFVAR_LISP ("standard-translation-table-for-decode",
9436                &Vstandard_translation_table_for_decode,
9437                doc: /* Table for translating characters while decoding.  */);
9438   Vstandard_translation_table_for_decode = Qnil;
9439
9440   DEFVAR_LISP ("standard-translation-table-for-encode",
9441                &Vstandard_translation_table_for_encode,
9442                doc: /* Table for translating characters while encoding.  */);
9443   Vstandard_translation_table_for_encode = Qnil;
9444
9445   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
9446                doc: /* Alist of charsets vs revision numbers.
9447 While encoding, if a charset (car part of an element) is found,
9448 designate it with the escape sequence identifying revision (cdr part
9449 of the element).  */);
9450   Vcharset_revision_table = Qnil;
9451
9452   DEFVAR_LISP ("default-process-coding-system",
9453                &Vdefault_process_coding_system,
9454                doc: /* Cons of coding systems used for process I/O by default.
9455 The car part is used for decoding a process output,
9456 the cdr part is used for encoding a text to be sent to a process.  */);
9457   Vdefault_process_coding_system = Qnil;
9458
9459   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
9460                doc: /*
9461 Table of extra Latin codes in the range 128..159 (inclusive).
9462 This is a vector of length 256.
9463 If Nth element is non-nil, the existence of code N in a file
9464 \(or output of subprocess) doesn't prevent it to be detected as
9465 a coding system of ISO 2022 variant which has a flag
9466 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9467 or reading output of a subprocess.
9468 Only 128th through 159th elements has a meaning.  */);
9469   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
9470
9471   DEFVAR_LISP ("select-safe-coding-system-function",
9472                &Vselect_safe_coding_system_function,
9473                doc: /*
9474 Function to call to select safe coding system for encoding a text.
9475
9476 If set, this function is called to force a user to select a proper
9477 coding system which can encode the text in the case that a default
9478 coding system used in each operation can't encode the text.
9479
9480 The default value is `select-safe-coding-system' (which see).  */);
9481   Vselect_safe_coding_system_function = Qnil;
9482
9483   DEFVAR_BOOL ("coding-system-require-warning",
9484                &coding_system_require_warning,
9485                doc: /* Internal use only.
9486 If non-nil, on writing a file, `select-safe-coding-system-function' is
9487 called even if `coding-system-for-write' is non-nil.  The command
9488 `universal-coding-system-argument' binds this variable to t temporarily.  */);
9489   coding_system_require_warning = 0;
9490
9491
9492   DEFVAR_BOOL ("inhibit-iso-escape-detection",
9493                &inhibit_iso_escape_detection,
9494                doc: /*
9495 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9496
9497 By default, on reading a file, Emacs tries to detect how the text is
9498 encoded.  This code detection is sensitive to escape sequences.  If
9499 the sequence is valid as ISO2022, the code is determined as one of
9500 the ISO2022 encodings, and the file is decoded by the corresponding
9501 coding system (e.g. `iso-2022-7bit').
9502
9503 However, there may be a case that you want to read escape sequences in
9504 a file as is.  In such a case, you can set this variable to non-nil.
9505 Then, as the code detection ignores any escape sequences, no file is
9506 detected as encoded in some ISO2022 encoding.  The result is that all
9507 escape sequences become visible in a buffer.
9508
9509 The default value is nil, and it is strongly recommended not to change
9510 it.  That is because many Emacs Lisp source files that contain
9511 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9512 in Emacs's distribution, and they won't be decoded correctly on
9513 reading if you suppress escape sequence detection.
9514
9515 The other way to read escape sequences in a file without decoding is
9516 to explicitly specify some coding system that doesn't use ISO2022's
9517 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
9518   inhibit_iso_escape_detection = 0;
9519
9520   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
9521                doc: /* Char table for translating self-inserting characters.
9522 This is applied to the result of input methods, not their input.  See also
9523 `keyboard-translate-table'.  */);
9524     Vtranslation_table_for_input = Qnil;
9525
9526   {
9527     Lisp_Object args[coding_arg_max];
9528     Lisp_Object plist[16];
9529     int i;
9530
9531     for (i = 0; i < coding_arg_max; i++)
9532       args[i] = Qnil;
9533
9534     plist[0] = intern (":name");
9535     plist[1] = args[coding_arg_name] = Qno_conversion;
9536     plist[2] = intern (":mnemonic");
9537     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
9538     plist[4] = intern (":coding-type");
9539     plist[5] = args[coding_arg_coding_type] = Qraw_text;
9540     plist[6] = intern (":ascii-compatible-p");
9541     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
9542     plist[8] = intern (":default-char");
9543     plist[9] = args[coding_arg_default_char] = make_number (0);
9544     plist[10] = intern (":for-unibyte");
9545     plist[11] = args[coding_arg_for_unibyte] = Qt;
9546     plist[12] = intern (":docstring");
9547     plist[13] = build_string ("Do no conversion.\n\
9548 \n\
9549 When you visit a file with this coding, the file is read into a\n\
9550 unibyte buffer as is, thus each byte of a file is treated as a\n\
9551 character.");
9552     plist[14] = intern (":eol-type");
9553     plist[15] = args[coding_arg_eol_type] = Qunix;
9554     args[coding_arg_plist] = Flist (16, plist);
9555     Fdefine_coding_system_internal (coding_arg_max, args);
9556   }
9557
9558   setup_coding_system (Qno_conversion, &keyboard_coding);
9559   setup_coding_system (Qno_conversion, &terminal_coding);
9560   setup_coding_system (Qno_conversion, &safe_terminal_coding);
9561
9562   {
9563     int i;
9564
9565     for (i = 0; i < coding_category_max; i++)
9566       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
9567   }
9568 }
9569
9570 char *
9571 emacs_strerror (error_number)
9572      int error_number;
9573 {
9574   char *str;
9575
9576   synchronize_system_messages_locale ();
9577   str = strerror (error_number);
9578
9579   if (! NILP (Vlocale_coding_system))
9580     {
9581       Lisp_Object dec = code_convert_string_norecord (build_string (str),
9582                                                       Vlocale_coding_system,
9583                                                       0);
9584       str = (char *) SDATA (dec);
9585     }
9586
9587   return str;
9588 }
9589
9590 #endif /* emacs */
9591
9592 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
9593    (do not change this comment) */