Add arch taglines

[bpt/emacs.git] / src / coding.c
diff --git a/src/coding.c b/src/coding.c

index 854e2b0..6d36cc3 100644 (file)
--- a/src/coding.c
+++ b/src/coding.c
@@ -1,6 +1,7 @@
  /* Coding system handler (conversion, detection, and etc).
-   Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
+   Copyright (C) 1995,97,1998,2002,2003  Electrotechnical Laboratory, JAPAN.
     Licensed to the Free Software Foundation.
+   Copyright (C) 2001,2002,2003  Free Software Foundation, Inc.
  
  This file is part of GNU Emacs.
  
@@ -37,18 +38,18 @@ Boston, MA 02111-1307, USA.  */
  /*** 0. General comments ***/
  
  
-/*** GENERAL NOTE on CODING SYSTEM ***
+/*** GENERAL NOTE on CODING SYSTEMS ***
  
-  Coding system is an encoding mechanism of one or more character
+  A coding system is an encoding mechanism for one or more character
    sets.  Here's a list of coding systems which Emacs can handle.  When
    we say "decode", it means converting some other coding system to
-  Emacs' internal format (emacs-internal), and when we say "encode",
+  Emacs' internal format (emacs-mule), and when we say "encode",
    it means converting the coding system emacs-mule to some other
    coding system.
  
    0. Emacs' internal format (emacs-mule)
  
-  Emacs itself holds a multi-lingual character in a buffer and a string
+  Emacs itself holds a multi-lingual character in buffers and strings
    in a special format.  Details are described in section 2.
  
    1. ISO2022
@@ -59,28 +60,28 @@ Boston, MA 02111-1307, USA.  */
    all variants of ISO2022.  Details are described in section 3.
  
    2. SJIS (or Shift-JIS or MS-Kanji-Code)
-   
+
    A coding system to encode character sets: ASCII, JISX0201, and
    JISX0208.  Widely used for PC's in Japan.  Details are described in
    section 4.
  
    3. BIG5
  
-  A coding system to encode character sets: ASCII and Big5.  Widely
-  used by Chinese (mainly in Taiwan and Hong Kong).  Details are
+  A coding system to encode the character sets ASCII and Big5.  Widely
+  used for Chinese (mainly in Taiwan and Hong Kong).  Details are
    described in section 4.  In this file, when we write "BIG5"
    (all uppercase), we mean the coding system, and when we write
    "Big5" (capitalized), we mean the character set.
  
    4. Raw text
  
-  A coding system for a text containing random 8-bit code.  Emacs does
-  no code conversion on such a text except for end-of-line format.
+  A coding system for text containing random 8-bit code.  Emacs does
+  no code conversion on such text except for end-of-line format.
  
    5. Other
  
-  If a user wants to read/write a text encoded in a coding system not
-  listed above, he can supply a decoder and an encoder for it in CCL
+  If a user wants to read/write text encoded in a coding system not
+  listed above, he can supply a decoder and an encoder for it as CCL
    (Code Conversion Language) programs.  Emacs executes the CCL program
    while reading/writing.
  
@@ -93,16 +94,16 @@ Boston, MA 02111-1307, USA.  */
  
  /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  
-  How end-of-line of a text is encoded depends on a system.  For
-  instance, Unix's format is just one byte of `line-feed' code,
+  How end-of-line of text is encoded depends on the operating system.
+  For instance, Unix's format is just one byte of `line-feed' code,
    whereas DOS's format is two-byte sequence of `carriage-return' and
    `line-feed' codes.  MacOS's format is usually one byte of
    `carriage-return'.
  
-  Since text characters encoding and end-of-line encoding are
-  independent, any coding system described above can take
-  any format of end-of-line.  So, Emacs has information of format of
-  end-of-line in each coding-system.  See section 6 for more details.
+  Since text character encoding and end-of-line encoding are
+  independent, any coding system described above can have any
+  end-of-line format.  So Emacs has information about end-of-line
+  format in each coding-system.  See section 6 for more details.
  
  */
  
@@ -110,9 +111,9 @@ Boston, MA 02111-1307, USA.  */
  
    These functions check if a text between SRC and SRC_END is encoded
    in the coding system category XXX.  Each returns an integer value in
-  which appropriate flag bits for the category XXX is set.  The flag
+  which appropriate flag bits for the category XXX are set.  The flag
    bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
-  template of these functions.  If MULTIBYTEP is nonzero, 8-bit codes
+  template for these functions.  If MULTIBYTEP is nonzero, 8-bit codes
    of the range 0x80..0x9F are in multibyte form.  */
  #if 0
  int
@@ -131,16 +132,17 @@ detect_coding_emacs_mule (src, src_end, multibytep)
    multibyte text goes to a place pointed to by DESTINATION, the length
    of which should not exceed DST_BYTES.
  
-  These functions set the information of original and decoded texts in
-  the members produced, produced_char, consumed, and consumed_char of
-  the structure *CODING.  They also set the member result to one of
-  CODING_FINISH_XXX indicating how the decoding finished.
+  These functions set the information about original and decoded texts
+  in the members `produced', `produced_char', `consumed', and
+  `consumed_char' of the structure *CODING.  They also set the member
+  `result' to one of CODING_FINISH_XXX indicating how the decoding
+  finished.
  
-  DST_BYTES zero means that source area and destination area are
+  DST_BYTES zero means that the source area and destination area are
    overlapped, which means that we can produce a decoded text until it
-  reaches at the head of not-yet-decoded source text.
+  reaches the head of the not-yet-decoded source text.
  
-  Below is a template of these functions.  */
+  Below is a template for these functions.  */
  #if 0
  static void
  decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
@@ -154,21 +156,22 @@ decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
  
  /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
  
-  These functions encode SRC_BYTES length text at SOURCE of Emacs'
+  These functions encode SRC_BYTES length text at SOURCE from Emacs'
    internal multibyte format to CODING.  The resulting unibyte text
    goes to a place pointed to by DESTINATION, the length of which
    should not exceed DST_BYTES.
  
-  These functions set the information of original and encoded texts in
-  the members produced, produced_char, consumed, and consumed_char of
-  the structure *CODING.  They also set the member result to one of
-  CODING_FINISH_XXX indicating how the encoding finished.
+  These functions set the information about original and encoded texts
+  in the members `produced', `produced_char', `consumed', and
+  `consumed_char' of the structure *CODING.  They also set the member
+  `result' to one of CODING_FINISH_XXX indicating how the encoding
+  finished.
  
-  DST_BYTES zero means that source area and destination area are
-  overlapped, which means that we can produce a encoded text until it
-  reaches at the head of not-yet-encoded source text.
+  DST_BYTES zero means that the source area and destination area are
+  overlapped, which means that we can produce encoded text until it
+  reaches at the head of the not-yet-encoded source text.
  
-  Below is a template of these functions.  */
+  Below is a template for these functions.  */
  #if 0
  static void
  encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
@@ -257,10 +260,10 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
    } while (0)
  
  
-/* Produce a multibyte form of characater C to `dst'.  Jump to
+/* Produce a multibyte form of character C to `dst'.  Jump to
     `label_end_of_loop' if there's not enough space at `dst'.
  
-   If we are now in the middle of composition sequence, the decoded
+   If we are now in the middle of a composition sequence, the decoded
     character may be ALTCHAR (for the current composition).  In that
     case, the character goes to coding->cmp_data->data instead of
     `dst'.
@@ -342,6 +345,7 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
  #include "ccl.h"
  #include "coding.h"
  #include "window.h"
+#include "intervals.h"
  
  #else  /* not emacs */
  
@@ -364,6 +368,8 @@ Lisp_Object Qtarget_idx;
  
  Lisp_Object Vselect_safe_coding_system_function;
  
+int coding_system_require_warning;
+
  /* Mnemonic string for each format of end-of-line.  */
  Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
  /* Mnemonic string to indicate format of end-of-line is not yet
@@ -376,6 +382,16 @@ int system_eol_type;
  
  #ifdef emacs
  
+/* Information about which coding system is safe for which chars.
+   The value has the form (GENERIC-LIST . NON-GENERIC-ALIST).
+
+   GENERIC-LIST is a list of generic coding systems which can encode
+   any characters.
+
+   NON-GENERIC-ALIST is an alist of non generic coding systems vs the
+   corresponding char table that contains safe chars.  */
+Lisp_Object Vcoding_system_safe_chars;
+
  Lisp_Object Vcoding_system_list, Vcoding_system_alist;
  
  Lisp_Object Qcoding_system_p, Qcoding_system_error;
@@ -384,6 +400,8 @@ Lisp_Object Qcoding_system_p, Qcoding_system_error;
     end-of-line format.  */
  Lisp_Object Qemacs_mule, Qraw_text;
  
+Lisp_Object Qutf_8;
+
  /* Coding-systems are handed between Emacs Lisp programs and C internal
     routines by the following three variables.  */
  /* Coding-system for reading files and receiving data from process.  */
@@ -459,7 +477,7 @@ char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
  struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
  
  /* Table of coding category masks.  Nth element is a mask for a coding
-   cateogry of which priority is Nth.  */
+   category of which priority is Nth.  */
  static
  int coding_priorities[CODING_CATEGORY_IDX_MAX];
  
@@ -482,26 +500,27 @@ Lisp_Object Vcharset_revision_alist;
  /* Default coding systems used for process I/O.  */
  Lisp_Object Vdefault_process_coding_system;
  
+/* Char table for translating Quail and self-inserting input.  */
+Lisp_Object Vtranslation_table_for_input;
+
  /* Global flag to tell that we can't call post-read-conversion and
     pre-write-conversion functions.  Usually the value is zero, but it
     is set to 1 temporarily while such functions are running.  This is
     to avoid infinite recursive call.  */
  static int inhibit_pre_post_conversion;
  
-/* Char-table containing safe coding systems of each character.  */
-Lisp_Object Vchar_coding_system_table;
  Lisp_Object Qchar_coding_system;
  
-/* Return `safe-chars' property of coding system CODING.  Don't check
-   validity of CODING.  */
+/* Return `safe-chars' property of CODING_SYSTEM (symbol).  Don't check
+   its validity.  */
  
  Lisp_Object
-coding_safe_chars (coding)
-     struct coding_system *coding;
+coding_safe_chars (coding_system)
+     Lisp_Object coding_system;
  {
    Lisp_Object coding_spec, plist, safe_chars;
-  
-  coding_spec = Fget (coding->symbol, Qcoding_system);
+
+  coding_spec = Fget (coding_system, Qcoding_system);
    plist = XVECTOR (coding_spec)->contents[3];
    safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
    return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
@@ -553,7 +572,7 @@ coding_safe_chars (coding)
     In that case, a sequence of one-byte codes has a slightly different
     form.
  
-   At first, all characters in eight-bit-control are represented by
+   Firstly, all characters in eight-bit-control are represented by
     one-byte sequences which are their 8-bit code.
  
     Next, character composition data are represented by the byte
@@ -562,12 +581,12 @@ coding_safe_chars (coding)
         METHOD is 0xF0 plus one of composition method (enum
         composition_method),
  
-       BYTES is 0x20 plus a byte length of this composition data,
+       BYTES is 0xA0 plus the byte length of these composition data,
  
-       CHARS is 0x20 plus a number of characters composed by this
+       CHARS is 0xA0 plus the number of characters composed by these
         data,
  
-       COMPONENTs are characters of multibye form or composition
+       COMPONENTs are characters of multibyte form or composition
         rules encoded by two-byte of ASCII codes.
  
     In addition, for backward compatibility, the following formats are
@@ -670,12 +689,20 @@ detect_coding_emacs_mule (src, src_end, multibytep)
  
  /* Record one COMPONENT (alternate character or composition rule).  */
  
-#define CODING_ADD_COMPOSITION_COMPONENT(coding, component)    \
-  (coding->cmp_data->data[coding->cmp_data->used++] = component)
+#define CODING_ADD_COMPOSITION_COMPONENT(coding, component)            \
+  do {                                                                 \
+    coding->cmp_data->data[coding->cmp_data->used++] = component;      \
+    if (coding->cmp_data->used - coding->cmp_data_start                        \
+       == COMPOSITION_DATA_MAX_BUNCH_LENGTH)                           \
+      {                                                                        \
+       CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
+       coding->composing = COMPOSITION_NO;                             \
+      }                                                                        \
+  } while (0)
  
  
  /* Get one byte from a data pointed by SRC and increment SRC.  If SRC
-   is not less than SRC_END, return -1 without inccrementing Src.  */
+   is not less than SRC_END, return -1 without incrementing Src.  */
  
  #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
  
@@ -688,7 +715,7 @@ detect_coding_emacs_mule (src, src_end, multibytep)
  #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p)               \
    do {                                                         \
      int bytes;                                                 \
-                                                               \
+                                                               \
      c = SAFE_ONE_MORE_BYTE ();                                 \
      if (c < 0)                                                 \
        break;                                                   \
@@ -719,7 +746,10 @@ detect_coding_emacs_mule (src, src_end, multibytep)
               break;                                            \
             *p++ = c;                                           \
           }                                                     \
-       if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes))     \
+       if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)      \
+           || (coding->flags /* We are recovering a file.  */  \
+               && p0[0] == LEADING_CODE_8_BIT_CONTROL          \
+               && ! CHAR_HEAD_P (p0[1])))                      \
           c = STRING_CHAR (p0, bytes);                          \
         else                                                    \
           c = -1;                                               \
@@ -772,7 +802,7 @@ decode_composition_emacs_mule (coding, src, src_end,
    unsigned char *dst = *destination;
    int method, data_len, nchars;
    unsigned char *src_base = src++;
-  /* Store compoments of composition.  */
+  /* Store components of composition.  */
    int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
    int ncomponent;
    /* Store multibyte form of characters to be composed.  This is for
@@ -808,6 +838,10 @@ decode_composition_emacs_mule (coding, src, src_end,
         return 0;
        for (ncomponent = 0; src < src_base + data_len; ncomponent++)
         {
+         /* If it is longer than this, it can't be valid.  */
+         if (ncomponent >= COMPOSITION_DATA_MAX_BUNCH_LENGTH)
+           return 0;
+
           if (ncomponent % 2 && with_rule)
             {
               ONE_MORE_BYTE (gref);
@@ -819,7 +853,10 @@ decode_composition_emacs_mule (coding, src, src_end,
           else
             {
               int bytes;
-             if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+             if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
+                 || (coding->flags /* We are recovering a file.  */
+                     && src[0] == LEADING_CODE_8_BIT_CONTROL
+                     && ! CHAR_HEAD_P (src[1])))
                 c = STRING_CHAR (src, bytes);
               else
                 c = *src, bytes = 1;
@@ -886,7 +923,7 @@ decode_composition_emacs_mule (coding, src, src_end,
        CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
        for (i = 0; i < ncomponent; i++)
         CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
-      CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);  
+      CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
        if (buf < bufp)
         {
           unsigned char *p = buf;
@@ -935,11 +972,6 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
               ONE_MORE_BYTE (c);
               if (c != '\n')
                 {
-                 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
-                   {
-                     coding->result = CODING_FINISH_INCONSISTENT_EOL;
-                     goto label_end_of_loop;
-                   }
                   src--;
                   c = '\r';
                 }
@@ -961,7 +993,7 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
           coding->produced_char++;
           continue;
         }
-      else if (*src == 0x80)
+      else if (*src == 0x80 && coding->cmp_data)
         {
           /* Start of composition data.  */
           int consumed  = decode_composition_emacs_mule (coding, src, src_end,
@@ -978,7 +1010,10 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
           p = tmp;
           src++;
         }
-      else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+      else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
+              || (coding->flags /* We are recovering a file.  */
+                  && src[0] == LEADING_CODE_8_BIT_CONTROL
+                  && ! CHAR_HEAD_P (src[1])))
         {
           p = src;
           src += bytes;
@@ -1052,9 +1087,9 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
         coding->cmp_data_start = 0;                                     \
        }                                                                        \
    } while (0)
-  
  
-static void encode_eol P_ ((struct coding_system *, unsigned char *,
+
+static void encode_eol P_ ((struct coding_system *, const unsigned char *,
                             unsigned char *, int, int));
  
  static void
@@ -1109,7 +1144,22 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
             EMIT_ONE_BYTE ('\r');
         }
        else if (SINGLE_BYTE_CHAR_P (c))
-       EMIT_ONE_BYTE (c);
+       {
+         if (coding->flags && ! ASCII_BYTE_P (c))
+           {
+             /* As we are auto saving, retain the multibyte form for
+                8-bit chars.  */
+             unsigned char buf[MAX_MULTIBYTE_LENGTH];
+             int bytes = CHAR_STRING (c, buf);
+
+             if (bytes == 1)
+               EMIT_ONE_BYTE (buf[0]);
+             else
+               EMIT_TWO_BYTES (buf[0], buf[1]);
+           }
+         else
+           EMIT_ONE_BYTE (c);
+       }
        else
         EMIT_BYTES (src_base, src);
        coding->consumed_char++;
@@ -1125,21 +1175,23 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
  
  /* The following note describes the coding system ISO2022 briefly.
     Since the intention of this note is to help understand the
-   functions in this file, some parts are NOT ACCURATE or OVERLY
+   functions in this file, some parts are NOT ACCURATE or are OVERLY
     SIMPLIFIED.  For thorough understanding, please refer to the
-   original document of ISO2022.
+   original document of ISO2022.  This is equivalent to the standard
+   ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
  
     ISO2022 provides many mechanisms to encode several character sets
-   in 7-bit and 8-bit environments.  For 7-bite environments, all text
+   in 7-bit and 8-bit environments.  For 7-bit environments, all text
     is encoded using bytes less than 128.  This may make the encoded
     text a little bit longer, but the text passes more easily through
-   several gateways, some of which strip off MSB (Most Signigant Bit).
+   several types of gateway, some of which strip off the MSB (Most
+   Significant Bit).
  
-   There are two kinds of character sets: control character set and
-   graphic character set.  The former contains control characters such
+   There are two kinds of character sets: control character sets and
+   graphic character sets.  The former contain control characters such
     as `newline' and `escape' to provide control functions (control
     functions are also provided by escape sequences).  The latter
-   contains graphic characters such as 'A' and '-'.  Emacs recognizes
+   contain graphic characters such as 'A' and '-'.  Emacs recognizes
     two control character sets and many graphic character sets.
  
     Graphic character sets are classified into one of the following
@@ -1151,14 +1203,14 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
     - DIMENSION2_CHARS96
  
     In addition, each character set is assigned an identification tag,
-   unique for each set, called "final character" (denoted as <F>
+   unique for each set, called the "final character" (denoted as <F>
     hereafter).  The <F> of each character set is decided by ECMA(*)
     when it is registered in ISO.  The code range of <F> is 0x30..0x7F
     (0x30..0x3F are for private use only).
  
     Note (*): ECMA = European Computer Manufacturers Association
  
-   Here are examples of graphic character set [NAME(<F>)]:
+   Here are examples of graphic character sets [NAME(<F>)]:
         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
@@ -1251,11 +1303,11 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
     Note (**): If <F> is '@', 'A', or 'B', the intermediate character
     '(' can be omitted.  We refer to this as "short-form" hereafter.
  
-   Now you may notice that there are a lot of ways for encoding the
+   Now you may notice that there are a lot of ways of encoding the
     same multilingual text in ISO2022.  Actually, there exist many
     coding systems such as Compound Text (used in X11's inter client
-   communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
-   (used in Korean internet), EUC (Extended UNIX Code, used in Asian
+   communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
+   (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
     localized platforms), and all of these are variants of ISO2022.
  
     In addition to the above, Emacs handles two more kinds of escape
@@ -1277,19 +1329,19 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
         o ESC '3' -- start relative composition with alternate chars  (**)
         o ESC '4' -- start rule-base composition with alternate chars  (**)
    Since these are not standard escape sequences of any ISO standard,
-  the use of them for these meaning is restricted to Emacs only.
+  the use of them with these meanings is restricted to Emacs only.
  
-  (*) This form is used only in Emacs 20.5 and the older versions,
+  (*) This form is used only in Emacs 20.5 and older versions,
    but the newer versions can safely decode it.
-  (**) This form is used only in Emacs 21.1 and the newer versions,
+  (**) This form is used only in Emacs 21.1 and newer versions,
    and the older versions can't decode it.
  
-  Here's a list of examples usages of these composition escape
+  Here's a list of example usages of these composition escape
    sequences (categorized by `enum composition_method').
  
    COMPOSITION_RELATIVE:
         ESC 0 CHAR [ CHAR ] ESC 1
-  COMPOSITOIN_WITH_RULE:
+  COMPOSITION_WITH_RULE:
         ESC 2 CHAR [ RULE CHAR ] ESC 1
    COMPOSITION_WITH_ALTCHARS:
         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
@@ -1301,7 +1353,7 @@ enum iso_code_class_type iso_code_class[256];
  #define CHARSET_OK(idx, charset, c)                                    \
    (coding_system_table[idx]                                            \
     && (charset == CHARSET_ASCII                                                \
-       || (safe_chars = coding_safe_chars (coding_system_table[idx]),  \
+       || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \
            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
     && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx],        \
                                               charset)                  \
@@ -1310,8 +1362,11 @@ enum iso_code_class_type iso_code_class[256];
  #define SHIFT_OUT_OK(idx) \
    (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
  
+#define COMPOSITION_OK(idx)    \
+  (coding_system_table[idx]->composing != COMPOSITION_DISABLED)
+
  /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
-   Check if a text is encoded in ISO2022.  If it is, returns an
+   Check if a text is encoded in ISO2022.  If it is, return an
     integer in which appropriate flag bits any of:
         CODING_CATEGORY_MASK_ISO_7
         CODING_CATEGORY_MASK_ISO_7_TIGHT
@@ -1330,7 +1385,7 @@ detect_coding_iso2022 (src, src_end, multibytep)
    int mask = CODING_CATEGORY_MASK_ISO;
    int mask_found = 0;
    int reg[4], shift_out = 0, single_shifting = 0;
-  int c, c1, i, charset;
+  int c, c1, charset;
    /* Dummy for ONE_MORE_BYTE.  */
    struct coding_system dummy_coding;
    struct coding_system *coding = &dummy_coding;
@@ -1340,6 +1395,7 @@ detect_coding_iso2022 (src, src_end, multibytep)
    while (mask && src < src_end)
      {
        ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+    retry:
        switch (c)
         {
         case ISO_CODE_ESC:
@@ -1386,7 +1442,30 @@ detect_coding_iso2022 (src, src_end, multibytep)
           else if (c >= '0' && c <= '4')
             {
               /* ESC <Fp> for start/end composition.  */
-             mask_found |= CODING_CATEGORY_MASK_ISO;
+             if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7))
+               mask_found |= CODING_CATEGORY_MASK_ISO_7;
+             else
+               mask &= ~CODING_CATEGORY_MASK_ISO_7;
+             if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT))
+               mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
+             else
+               mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
+             if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1))
+               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
+             else
+               mask &= ~CODING_CATEGORY_MASK_ISO_8_1;
+             if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2))
+               mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
+             else
+               mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
+             if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE))
+               mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
+             else
+               mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
+             if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))
+               mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
+             else
+               mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
               break;
             }
           else
@@ -1428,7 +1507,7 @@ detect_coding_iso2022 (src, src_end, multibytep)
               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
             }
           break;
-         
+
         case ISO_CODE_SI:
           if (inhibit_iso_escape_detection)
             break;
@@ -1514,6 +1593,8 @@ detect_coding_iso2022 (src, src_end, multibytep)
                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
                 {
                   int i = 1;
+
+                 c = -1;
                   while (src < src_end)
                     {
                       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
@@ -1526,6 +1607,9 @@ detect_coding_iso2022 (src, src_end, multibytep)
                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
                   else
                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
+                 if (c >= 0)
+                   /* This means that we have read one extra byte.  */
+                   goto retry;
                 }
             }
           break;
@@ -1624,7 +1708,7 @@ coding_allocate_composition_data (coding, char_offset)
             that coding->cmp_data has enough space to store the            \
             information about the composition.  If not, terminate the      \
             current decoding loop, allocate one more memory block for      \
-           coding->cmp_data in the calller, then start the decoding       \
+           coding->cmp_data in the caller, then start the decoding        \
             loop again.  We can't allocate memory here directly because    \
             it may cause buffer/string relocation.  */                     \
         if (!coding->cmp_data                                              \
@@ -1656,11 +1740,11 @@ coding_allocate_composition_data (coding, char_offset)
        }                                                                           \
    } while (0)
  
-/* Handle compositoin end sequence ESC 1.  */
+/* Handle composition end sequence ESC 1.  */
  
  #define DECODE_COMPOSITION_END(c1)                                     \
    do {                                                                 \
-    if (coding->composing == COMPOSITION_DISABLED)                     \
+    if (! COMPOSING_P (coding))                                                \
        {                                                                        \
         *dst++ = ISO_CODE_ESC;                                          \
         *dst++ = c1;                                                    \
@@ -1724,7 +1808,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
    Lisp_Object translation_table;
    Lisp_Object safe_chars;
  
-  safe_chars = coding_safe_chars (coding);
+  safe_chars = coding_safe_chars (coding->symbol);
  
    if (NILP (Venable_character_translation))
      translation_table = Qnil;
@@ -1739,7 +1823,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
  
    while (1)
      {
-      int c1, c2;
+      int c1, c2 = 0;
  
        src_base = src;
        ONE_MORE_BYTE (c1);
@@ -1815,11 +1899,6 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
               ONE_MORE_BYTE (c1);
               if (c1 != ISO_CODE_LF)
                 {
-                 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
-                   {
-                     coding->result = CODING_FINISH_INCONSISTENT_EOL;
-                     goto label_end_of_loop;
-                   }
                   src--;
                   c1 = '\r';
                 }
@@ -1955,7 +2034,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
                 goto label_invalid_code;
               /* For the moment, nested direction is not supported.
                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
-                left-to-right, and nozero means right-to-left.  */
+                left-to-right, and nonzero means right-to-left.  */
               ONE_MORE_BYTE (c1);
               switch (c1)
                 {
@@ -1984,6 +2063,78 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
                 }
               continue;
  
+           case '%':
+             if (COMPOSING_P (coding))
+               DECODE_COMPOSITION_END ('1');
+             ONE_MORE_BYTE (c1);
+             if (c1 == '/')
+               {
+                 /* CTEXT extended segment:
+                    ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
+                    We keep these bytes as is for the moment.
+                    They may be decoded by post-read-conversion.  */
+                 int dim, M, L;
+                 int size, required;
+                 int produced_chars;
+                 
+                 ONE_MORE_BYTE (dim);
+                 ONE_MORE_BYTE (M);
+                 ONE_MORE_BYTE (L);
+                 size = ((M - 128) * 128) + (L - 128);
+                 required = 8 + size * 2;
+                 if (dst + required > (dst_bytes ? dst_end : src))
+                   goto label_end_of_loop;
+                 *dst++ = ISO_CODE_ESC;
+                 *dst++ = '%';
+                 *dst++ = '/';
+                 *dst++ = dim;
+                 produced_chars = 4;
+                 dst += CHAR_STRING (M, dst), produced_chars++;
+                 dst += CHAR_STRING (L, dst), produced_chars++;
+                 while (size-- > 0)
+                   {
+                     ONE_MORE_BYTE (c1);
+                     dst += CHAR_STRING (c1, dst), produced_chars++;
+                   }
+                 coding->produced_char += produced_chars;
+               }
+             else if (c1 == 'G')
+               {
+                 unsigned char *d = dst;
+                 int produced_chars;
+
+                 /* XFree86 extension for embedding UTF-8 in CTEXT:
+                    ESC % G --UTF-8-BYTES-- ESC % @
+                    We keep these bytes as is for the moment.
+                    They may be decoded by post-read-conversion.  */
+                 if (d + 6 > (dst_bytes ? dst_end : src))
+                   goto label_end_of_loop;
+                 *d++ = ISO_CODE_ESC;
+                 *d++ = '%';
+                 *d++ = 'G';
+                 produced_chars = 3;
+                 while (d + 1 < (dst_bytes ? dst_end : src))
+                   {
+                     ONE_MORE_BYTE (c1);
+                     if (c1 == ISO_CODE_ESC
+                         && src + 1 < src_end
+                         && src[0] == '%'
+                         && src[1] == '@')
+                       break;
+                     d += CHAR_STRING (c1, d), produced_chars++;
+                   }
+                 if (d + 3 > (dst_bytes ? dst_end : src))
+                   goto label_end_of_loop;
+                 *d++ = ISO_CODE_ESC;
+                 *d++ = '%';
+                 *d++ = '@';
+                 dst = d;
+                 coding->produced_char += produced_chars + 3;
+               }
+             else
+               goto label_invalid_code;
+             continue;
+
             default:
               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
                 goto label_invalid_code;
@@ -2040,9 +2191,9 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
  
  /*
     It is not enough to say just "ISO2022" on encoding, we have to
-   specify more details.  In Emacs, each coding system of ISO2022
+   specify more details.  In Emacs, each ISO2022 coding system
     variant has the following specifications:
-       1. Initial designation to G0 thru G3.
+       1. Initial designation to G0 through G3.
         2. Allows short-form designation?
         3. ASCII should be designated to G0 before control characters?
         4. ASCII should be designated to G0 at end of line?
@@ -2249,11 +2400,11 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
  
  /* Instead of encoding character C, produce one or two `?'s.  */
  
-#define ENCODE_UNSAFE_CHARACTER(c)                                     \
-  do {                                                                 \
-    ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);      \
-    if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                          \
-      ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);    \
+#define ENCODE_UNSAFE_CHARACTER(c)                             \
+  do {                                                         \
+    ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);       \
+    if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                  \
+      ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);     \
    } while (0)
  
  
@@ -2434,7 +2585,7 @@ encode_designation_at_bol (coding, translation_table, src, src_end, dst)
        ONE_MORE_CHAR (c);
        if (c == '\n')
         break;
-      
+
        charset = CHAR_CHARSET (c);
        reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
        if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
@@ -2482,7 +2633,10 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
    Lisp_Object translation_table;
    Lisp_Object safe_chars;
  
-  safe_chars = coding_safe_chars (coding);
+  if (coding->flags & CODING_FLAG_ISO_SAFE)
+    coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
+
+  safe_chars = coding_safe_chars (coding->symbol);
  
    if (NILP (Venable_character_translation))
      translation_table = Qnil;
@@ -2536,7 +2690,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
                 /* We have consumed components of the composition.
-                   What follows in SRC is the compositions's base
+                   What follows in SRC is the composition's base
                     text.  */
                 ENCODE_COMPOSITION_FAKE_START (coding);
               else
@@ -2549,7 +2703,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
                     }
                   else
                     {
-                     if (coding->flags & CODING_FLAG_ISO_SAFE
+                     if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
                         ENCODE_UNSAFE_CHARACTER (c);
                       else
@@ -2604,7 +2758,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
                 *dst++ = ISO_CODE_CR;
               CODING_SPEC_ISO_BOL (coding) = 1;
             }
-         else 
+         else
             {
               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
                 ENCODE_RESET_PLANE_AND_REGISTER;
@@ -2618,7 +2772,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
           *dst++ = c;
           coding->errors++;
         }
-      else if (coding->flags & CODING_FLAG_ISO_SAFE
+      else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
                && ! CODING_SAFE_CHAR_P (safe_chars, c))
         ENCODE_UNSAFE_CHARACTER (c);
        else
@@ -2635,7 +2789,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
  \f
  /*** 4. SJIS and BIG5 handlers ***/
  
-/* Although SJIS and BIG5 are not ISO's coding system, they are used
+/* Although SJIS and BIG5 are not ISO coding systems, they are used
     quite widely.  So, for the moment, Emacs supports them in the bare
     C code.  But, in the future, they may be supported only by CCL.  */
  
@@ -2644,12 +2798,12 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
     as is.  A character of charset katakana-jisx0201 is encoded by
     "position-code + 0x80".  A character of charset japanese-jisx0208
     is encoded in 2-byte but two position-codes are divided and shifted
-   so that it fit in the range below.
+   so that it fits in the range below.
  
     --- CODE RANGE of SJIS ---
     (character set)     (range)
     ASCII               0x00 .. 0x7F
-   KATAKANA-JISX0201   0xA0 .. 0xDF
+   KATAKANA-JISX0201   0xA1 .. 0xDF
     JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
     -------------------------------
@@ -2658,7 +2812,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
  
  /* BIG5 is a coding system encoding two character sets: ASCII and
     Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
-   character set and is encoded in two-byte.
+   character set and is encoded in two bytes.
  
     --- CODE RANGE of BIG5 ---
     (character set)     (range)
@@ -2676,7 +2830,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
  
  /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
     are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
-   C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
+   C1 and C2 are the 1st and 2nd position-codes of Emacs' internal
     format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
  
  /* Number of Big5 characters which have the same code in 1st byte.  */
@@ -2724,15 +2878,14 @@ detect_coding_sjis (src, src_end, multibytep)
    while (1)
      {
        ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
-      if (c >= 0x81)
+      if (c < 0x80)
+       continue;
+      if (c == 0x80 || c == 0xA0 || c > 0xEF)
+       return 0;
+      if (c <= 0x9F || c >= 0xE0)
         {
-         if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF))
-           {
-             ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
-             if (c < 0x40 || c == 0x7F || c > 0xFC)
-               return 0;
-           }
-         else if (c > 0xDF)
+         ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+         if (c < 0x40 || c == 0x7F || c > 0xFC)
             return 0;
         }
      }
@@ -2757,12 +2910,13 @@ detect_coding_big5 (src, src_end, multibytep)
    while (1)
      {
        ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
-      if (c >= 0xA1)
-       {
-         ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
-         if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
-           return 0;
-       }
+      if (c < 0x80)
+       continue;
+      if (c < 0xA1 || c > 0xFE)
+       return 0;
+      ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+      if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
+       return 0;
      }
   label_end_of_loop:
    return CODING_CATEGORY_MASK_BIG5;
@@ -2845,7 +2999,7 @@ detect_coding_utf_16 (src, src_end, multibytep)
       int multibytep;
  {
    unsigned char c1, c2;
-  /* Dummy for TWO_MORE_BYTES.  */
+  /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE.  */
    struct coding_system dummy_coding;
    struct coding_system *coding = &dummy_coding;
  
@@ -2896,7 +3050,7 @@ decode_coding_sjis_big5 (coding, source, destination,
    coding->produced_char = 0;
    while (1)
      {
-      int c, charset, c1, c2;
+      int c, charset, c1, c2 = 0;
  
        src_base = src;
        ONE_MORE_BYTE (c1);
@@ -2913,12 +3067,6 @@ decode_coding_sjis_big5 (coding, source, destination,
                       ONE_MORE_BYTE (c2);
                       if (c2 == '\n')
                         c1 = c2;
-                     else if (coding->mode
-                              & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
-                       {
-                         coding->result = CODING_FINISH_INCONSISTENT_EOL;
-                         goto label_end_of_loop;
-                       }
                       else
                         /* To process C2 again, SRC is subtracted by 1.  */
                         src--;
@@ -2940,9 +3088,9 @@ decode_coding_sjis_big5 (coding, source, destination,
          {
           if (sjis_p)
             {
-             if (c1 >= 0xF0)
+             if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF)
                 goto label_invalid_code;
-             if (c1 < 0xA0 || c1 >= 0xE0)
+             if (c1 <= 0x9F || c1 >= 0xE0)
                 {
                   /* SJIS -> JISX0208 */
                   ONE_MORE_BYTE (c2);
@@ -2958,7 +3106,7 @@ decode_coding_sjis_big5 (coding, source, destination,
           else
             {
               /* BIG5 -> Big5 */
-             if (c1 < 0xA1 || c1 > 0xFE)
+             if (c1 < 0xA0 || c1 > 0xFE)
                 goto label_invalid_code;
               ONE_MORE_BYTE (c2);
               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
@@ -3027,14 +3175,14 @@ encode_coding_sjis_big5 (coding, source, destination,
  
        src_base = src;
        ONE_MORE_CHAR (c);
-      
+
        /* Now encode the character C.  */
        if (SINGLE_BYTE_CHAR_P (c))
         {
           switch (c)
             {
             case '\r':
-             if (!coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
+             if (!(coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
                 {
                   EMIT_ONE_BYTE (c);
                   break;
@@ -3067,6 +3215,12 @@ encode_coding_sjis_big5 (coding, source, destination,
                 EMIT_ONE_BYTE (c1 | 0x80);
               else if (charset == charset_latin_jisx0201)
                 EMIT_ONE_BYTE (c1);
+             else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
+               {
+                 EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
+                 if (CHARSET_WIDTH (charset) > 1)
+                   EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
+               }
               else
                 /* There's no way other than producing the internal
                    codes as is.  */
@@ -3079,6 +3233,12 @@ encode_coding_sjis_big5 (coding, source, destination,
                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
                   EMIT_TWO_BYTES (c1, c2);
                 }
+             else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
+               {
+                 EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
+                 if (CHARSET_WIDTH (charset) > 1)
+                   EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
+               }
               else
                 /* There's no way other than producing the internal
                    codes as is.  */
@@ -3164,11 +3324,6 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes)
               ONE_MORE_BYTE (c);
               if (c != '\n')
                 {
-                 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
-                   {
-                     coding->result = CODING_FINISH_INCONSISTENT_EOL;
-                     goto label_end_of_loop;
-                   }
                   src--;
                   c = '\r';
                 }
@@ -3219,7 +3374,7 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes)
  
  /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
     format of end-of-line according to `coding->eol_type'.  It also
-   convert multibyte form 8-bit characers to unibyte if
+   convert multibyte form 8-bit characters to unibyte if
     CODING->src_multibyte is nonzero.  If `coding->mode &
     CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
     also means end-of-line.  */
@@ -3227,12 +3382,13 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes)
  static void
  encode_eol (coding, source, destination, src_bytes, dst_bytes)
       struct coding_system *coding;
-     unsigned char *source, *destination;
+     const unsigned char *source;
+     unsigned char *destination;
       int src_bytes, dst_bytes;
  {
-  unsigned char *src = source;
+  const unsigned char *src = source;
    unsigned char *dst = destination;
-  unsigned char *src_end = src + src_bytes;
+  const unsigned char *src_end = src + src_bytes;
    unsigned char *dst_end = dst + dst_bytes;
    Lisp_Object translation_table;
    /* SRC_BASE remembers the start position in source in each loop.
@@ -3240,7 +3396,8 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes)
       analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
       there's not enough destination area to produce encoded codes
       (within macro EMIT_BYTES).  */
-  unsigned char *src_base;
+  const unsigned char *src_base;
+  unsigned char *tmp;
    int c;
    int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
  
@@ -3290,13 +3447,13 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes)
         }
        if (coding->eol_type == CODING_EOL_CR)
         {
-         for (src = destination; src < dst; src++)
-           if (*src == '\n') *src = '\r';
+         for (tmp = destination; tmp < dst; tmp++)
+           if (*tmp == '\n') *tmp = '\r';
         }
        else if (selective_display)
         {
-         for (src = destination; src < dst; src++)
-           if (*src == '\r') *src = '\n';
+         for (tmp = destination; tmp < dst; tmp++)
+           if (*tmp == '\r') *tmp = '\n';
         }
      }
    if (coding->src_multibyte)
@@ -3310,15 +3467,15 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes)
  \f
  /*** 7. C library functions ***/
  
-/* In Emacs Lisp, coding system is represented by a Lisp symbol which
+/* In Emacs Lisp, a coding system is represented by a Lisp symbol which
     has a property `coding-system'.  The value of this property is a
-   vector of length 5 (called as coding-vector).  Among elements of
+   vector of length 5 (called the coding-vector).  Among elements of
     this vector, the first (element[0]) and the fifth (element[4])
     carry important information for decoding/encoding.  Before
     decoding/encoding, this information should be set in fields of a
     structure of type `coding_system'.
  
-   A value of property `coding-system' can be a symbol of another
+   The value of the property `coding-system' can be a symbol of another
     subsidiary coding-system.  In that case, Emacs gets coding-vector
     from that symbol.
  
@@ -3352,7 +3509,7 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes)
  
         If the value is nil, graphic register N is never used on
         encoding.
-   
+
     sub-element[N] where N is 4 through 11: to be set in `coding->flags'
         Each value takes t or nil.  See the section ISO2022 of
         `coding.h' for more information.
@@ -3362,12 +3519,12 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes)
  
     If `coding->type' takes the other value, element[4] is ignored.
  
-   Emacs Lisp's coding system also carries information about format of
+   Emacs Lisp's coding systems also carry information about format of
     end-of-line in a value of property `eol-type'.  If the value is
     integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
     means CODING_EOL_CR.  If it is not integer, it should be a vector
     of subsidiary coding systems of which property `eol-type' has one
-   of above values.
+   of the above values.
  
  */
  
@@ -3383,7 +3540,6 @@ setup_coding_system (coding_system, coding)
  {
    Lisp_Object coding_spec, coding_type, eol_type, plist;
    Lisp_Object val;
-  int i;
  
    /* At first, zero clear all members.  */
    bzero (coding, sizeof (struct coding_system));
@@ -3451,7 +3607,7 @@ setup_coding_system (coding_system, coding)
       `translation-table-for-decode', `translation-table-for-encode'.  */
    plist = XVECTOR (coding_spec)->contents[3];
    /* Pre & post conversion functions should be disabled if
-     inhibit_eol_conversion is nozero.  This is the case that a code
+     inhibit_eol_conversion is nonzero.  This is the case that a code
       conversion function is called while those functions are running.  */
    if (! inhibit_pre_post_conversion)
      {
@@ -3477,7 +3633,7 @@ setup_coding_system (coding_system, coding)
      }
    else
      goto label_invalid_coding_system;
-  
+
    /* If the coding system has non-nil `composition' property, enable
       composition handling.  */
    val = Fplist_get (plist, Qcomposition);
@@ -3490,7 +3646,6 @@ setup_coding_system (coding_system, coding)
        coding->type = coding_type_emacs_mule;
        coding->common_flags
         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
-      coding->composing = COMPOSITION_NO;
        if (!NILP (coding->post_read_conversion))
         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
        if (!NILP (coding->pre_write_conversion))
@@ -3572,8 +3727,8 @@ setup_coding_system (coding_system, coding)
             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
         for (i = 0; i < 4; i++)
           {
-           if (INTEGERP (flags[i])
-               && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
+           if ((INTEGERP (flags[i])
+                && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
                 || (charset = get_charset_id (flags[i])) >= 0)
               {
                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
@@ -3591,9 +3746,9 @@ setup_coding_system (coding_system, coding)
                 tail = flags[i];
  
                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
-               if (INTEGERP (XCAR (tail))
-                   && (charset = XINT (XCAR (tail)),
-                       CHARSET_VALID_P (charset))
+               if ((INTEGERP (XCAR (tail))
+                    && (charset = XINT (XCAR (tail)),
+                        CHARSET_VALID_P (charset)))
                     || (charset = get_charset_id (XCAR (tail))) >= 0)
                   {
                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
@@ -3604,9 +3759,9 @@ setup_coding_system (coding_system, coding)
                 tail = XCDR (tail);
                 while (CONSP (tail))
                   {
-                   if (INTEGERP (XCAR (tail))
-                       && (charset = XINT (XCAR (tail)),
-                           CHARSET_VALID_P (charset))
+                   if ((INTEGERP (XCAR (tail))
+                        && (charset = XINT (XCAR (tail)),
+                            CHARSET_VALID_P (charset)))
                         || (charset = get_charset_id (XCAR (tail))) >= 0)
                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
                         = i;
@@ -3617,7 +3772,7 @@ setup_coding_system (coding_system, coding)
               }
             else
               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
-           
+
             CODING_SPEC_ISO_DESIGNATION (coding, i)
               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
           }
@@ -3635,7 +3790,7 @@ setup_coding_system (coding_system, coding)
         if (reg_bits)
           for (charset = 0; charset <= MAX_CHARSET; charset++)
             {
-             if (CHARSET_VALID_P (charset)
+             if (CHARSET_DEFINED_P (charset)
                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
                 {
@@ -3846,14 +4001,14 @@ setup_raw_text_coding_system (coding)
     o coding-category-iso-7-else
  
         The category for a coding system which has the same code range
-       as ISO2022 of 7-bit environemnt but uses locking shift or
+       as ISO2022 of 7-bit environment but uses locking shift or
         single shift functions.  Assigned the coding-system (Lisp
         symbol) `iso-2022-7bit-lock' by default.
  
     o coding-category-iso-8-else
  
         The category for a coding system which has the same code range
-       as ISO2022 of 8-bit environemnt but uses locking shift or
+       as ISO2022 of 8-bit environment but uses locking shift or
         single shift functions.  Assigned the coding-system (Lisp
         symbol) `iso-2022-8bit-ss2' by default.
  
@@ -3896,10 +4051,10 @@ setup_raw_text_coding_system (coding)
         `no-conversion' by default.
  
     Each of them is a Lisp symbol and the value is an actual
-   `coding-system's (this is also a Lisp symbol) assigned by a user.
+   `coding-system' (this is also a Lisp symbol) assigned by a user.
     What Emacs does actually is to detect a category of coding system.
     Then, it uses a `coding-system' assigned to it.  If Emacs can't
-   decide only one possible category, it selects a category of the
+   decide a single possible category, it selects a category of the
     highest priority.  Priorities of categories are also specified by a
     user in a Lisp variable `coding-category-list'.
  
@@ -3928,7 +4083,7 @@ detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
    register unsigned char c;
    unsigned char *src = source, *src_end = source + src_bytes;
    unsigned int mask, utf16_examined_p, iso2022_examined_p;
-  int i, idx;
+  int i;
  
    /* At first, skip all ASCII characters and control characters except
       for three ISO2022 specific control characters.  */
@@ -3977,7 +4132,7 @@ detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
        int try;
  
        if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
-       c = *src++ - 0x20;
+       c = src[1] - 0x20;
  
        if (c < 0xA0)
         {
@@ -3990,7 +4145,7 @@ detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
                   | CODING_CATEGORY_MASK_UTF_16_LE);
  
           /* Or, if C is a special latin extra code,
-            or is an ISO2022 specific control code of C1 (SS2 or SS3), 
+            or is an ISO2022 specific control code of C1 (SS2 or SS3),
              or is an ISO2022 control-sequence-introducer (CSI),
              we should also consider the possibility of ISO2022 codings.  */
           if ((VECTORP (Vlatin_extra_code_table)
@@ -4033,7 +4188,7 @@ detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
               if (!iso2022_examined_p
                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
                 {
-                 mask |= detect_coding_iso2022 (src, src_end);
+                 mask |= detect_coding_iso2022 (src, src_end, multibytep);
                   iso2022_examined_p = 1;
                 }
               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
@@ -4086,11 +4241,11 @@ detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
  void
  detect_coding (coding, src, src_bytes)
       struct coding_system *coding;
-     unsigned char *src;
+     const unsigned char *src;
       int src_bytes;
  {
    unsigned int idx;
-  int skip, mask, i;
+  int skip, mask;
    Lisp_Object val;
  
    val = Vcoding_category_list;
@@ -4106,7 +4261,7 @@ detect_coding (coding, src, src_bytes)
    if (! mask)
      idx = CODING_CATEGORY_IDX_RAW_TEXT;
  
-  val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
+  val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[idx]);
  
    if (coding->eol_type != CODING_EOL_UNDECIDED)
      {
@@ -4189,7 +4344,7 @@ detect_eol_type (source, src_bytes, skip)
  static int
  detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
       unsigned char *source;
-     int src_bytes, *skip;
+     int src_bytes, *skip, big_endian_p;
  {
    unsigned char *src = source, *src_end = src + src_bytes;
    unsigned int c1, c2;
@@ -4259,7 +4414,7 @@ detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
  void
  detect_eol (coding, src, src_bytes)
       struct coding_system *coding;
-     unsigned char *src;
+     const unsigned char *src;
       int src_bytes;
  {
    Lisp_Object val;
@@ -4309,11 +4464,13 @@ detect_eol (coding, src, src_bytes)
      {
        int src_multibyte = coding->src_multibyte;
        int dst_multibyte = coding->dst_multibyte;
+      struct composition_data *cmp_data = coding->cmp_data;
  
        setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
        coding->src_multibyte = src_multibyte;
        coding->dst_multibyte = dst_multibyte;
        coding->heading_ascii = skip;
+      coding->cmp_data = cmp_data;
      }
  }
  
@@ -4349,7 +4506,11 @@ encoding_buffer_size (coding, src_bytes)
    int magnification;
  
    if (coding->type == coding_type_ccl)
-    magnification = coding->spec.ccl.encoder.buf_magnification;
+    {
+      magnification = coding->spec.ccl.encoder.buf_magnification;
+      if (coding->eol_type == CODING_EOL_CRLF)
+       magnification *= 2;
+    }
    else if (CODING_REQUIRE_ENCODING (coding))
      magnification = 3;
    else
@@ -4422,9 +4583,9 @@ ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
  {
    struct ccl_program *ccl
      = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
-  int result;
    unsigned char *dst = destination;
  
+  ccl->suppress_error = coding->suppress_error;
    ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
    if (encodep)
      {
@@ -4434,7 +4595,10 @@ ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
        if (ccl->eol_type ==CODING_EOL_UNDECIDED)
         ccl->eol_type = CODING_EOL_LF;
        ccl->cr_consumed = coding->spec.ccl.cr_carryover;
+      ccl->eight_bit_control = coding->dst_multibyte;
      }
+  else
+    ccl->eight_bit_control = 1;
    ccl->multibyte = coding->src_multibyte;
    if (coding->spec.ccl.eight_bit_carryover[0] != 0)
      {
@@ -4456,6 +4620,13 @@ ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
        coding->produced_char = coding->produced;
        coding->spec.ccl.cr_carryover = ccl->cr_consumed;
      }
+  else if (!ccl->eight_bit_control)
+    {
+      /* The produced bytes forms a valid multibyte sequence. */
+      coding->produced_char
+       = multibyte_chars_in_text (destination, coding->produced);
+      coding->spec.ccl.eight_bit_carryover[0] = 0;
+    }
    else
      {
        /* On decoding, the destination should always multibyte.  But,
@@ -4575,7 +4746,7 @@ decode_eol_post_ccl (coding, ptr, bytes)
         {
           /* If the last character is CR, we can't handle it here
              because LF will be in the not-yet-decoded source text.
-            Recorded that the CR is not yet processed.  */
+            Record that the CR is not yet processed.  */
           coding->spec.ccl.cr_carryover = 1;
           coding->produced--;
           coding->produced_char--;
@@ -4668,15 +4839,23 @@ decode_eol_post_ccl (coding, ptr, bytes)
  int
  decode_coding (coding, source, destination, src_bytes, dst_bytes)
       struct coding_system *coding;
-     unsigned char *source, *destination;
+     const unsigned char *source;
+     unsigned char *destination;
       int src_bytes, dst_bytes;
  {
+  int extra = 0;
+
    if (coding->type == coding_type_undecided)
      detect_coding (coding, source, src_bytes);
  
    if (coding->eol_type == CODING_EOL_UNDECIDED
        && coding->type != coding_type_ccl)
-    detect_eol (coding, source, src_bytes);
+    {
+      detect_eol (coding, source, src_bytes);
+      /* We had better recover the original eol format if we
+        encounter an inconsistent eol format while decoding.  */
+      coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
+    }
  
    coding->produced = coding->produced_char = 0;
    coding->consumed = coding->consumed_char = 0;
@@ -4708,18 +4887,24 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes)
      case coding_type_ccl:
        if (coding->spec.ccl.cr_carryover)
         {
-         /* Set the CR which is not processed by the previous call of
-            decode_eol_post_ccl in DESTINATION.  */
+         /* Put the CR which was not processed by the previous call
+            of decode_eol_post_ccl in DESTINATION.  It will be
+            decoded together with the following LF by the call to
+            decode_eol_post_ccl below.  */
           *destination = '\r';
           coding->produced++;
           coding->produced_char++;
           dst_bytes--;
+         extra = coding->spec.ccl.cr_carryover;
         }
-      ccl_coding_driver (coding, source,
-                        destination + coding->spec.ccl.cr_carryover,
+      ccl_coding_driver (coding, source, destination + extra,
                          src_bytes, dst_bytes, 0);
        if (coding->eol_type != CODING_EOL_LF)
-       decode_eol_post_ccl (coding, destination, coding->produced);
+       {
+         coding->produced += extra;
+         coding->produced_char += extra;
+         decode_eol_post_ccl (coding, destination, coding->produced);
+       }
        break;
  
      default:
@@ -4734,7 +4919,7 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes)
    if (coding->mode & CODING_MODE_LAST_BLOCK
        && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
      {
-      unsigned char *src = source + coding->consumed;
+      const unsigned char *src = source + coding->consumed;
        unsigned char *dst = destination + coding->produced;
  
        src_bytes -= coding->consumed;
@@ -4768,7 +4953,8 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes)
  int
  encode_coding (coding, source, destination, src_bytes, dst_bytes)
       struct coding_system *coding;
-     unsigned char *source, *destination;
+     const unsigned char *source;
+     unsigned char *destination;
       int src_bytes, dst_bytes;
  {
    coding->produced = coding->produced_char = 0;
@@ -4810,8 +4996,7 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes)
    if (coding->mode & CODING_MODE_LAST_BLOCK
        && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
      {
-      unsigned char *src = source + coding->consumed;
-      unsigned char *src_end = src + src_bytes;
+      const unsigned char *src = source + coding->consumed;
        unsigned char *dst = destination + coding->produced;
  
        if (coding->type == coding_type_iso2022)
@@ -4822,7 +5007,7 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes)
         {
           int len = src_bytes - coding->consumed;
  
-         BCOPY_SHORT (source + coding->consumed, dst, len);
+         BCOPY_SHORT (src, dst, len);
           if (coding->src_multibyte)
             len = str_as_unibyte (dst, len);
           dst += len;
@@ -4968,7 +5153,7 @@ shrink_decoding_region (beg, end, coding, str)
         case CODING_CATEGORY_IDX_ISO_7:
         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
           {
-           /* We can skip all charactes at the tail except for 8-bit
+           /* We can skip all characters at the tail except for 8-bit
                codes and ESC and the following 2-byte at the tail.  */
             unsigned char *eight_bit = NULL;
  
@@ -5032,7 +5217,7 @@ shrink_encoding_region (beg, end, coding, str)
    if (coding->type == coding_type_ccl
        || coding->eol_type == CODING_EOL_CRLF
        || coding->eol_type == CODING_EOL_CR
-      || coding->cmp_data && coding->cmp_data->used > 0)
+      || (coding->cmp_data && coding->cmp_data->used > 0))
      {
        /* We can't skip any data.  */
        return;
@@ -5057,7 +5242,7 @@ shrink_encoding_region (beg, end, coding, str)
         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
           break;
        if (i < 128)
-       /* Some ASCII character should be tranlsated.  We give up
+       /* Some ASCII character should be translated.  We give up
            shrinking.  */
         return;
      }
@@ -5086,7 +5271,7 @@ shrink_encoding_region (beg, end, coding, str)
         break;
        if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
         {
-         unsigned char *bol = begp; 
+         unsigned char *bol = begp;
           while (begp < endp && *begp < 0x80)
             {
               begp++;
@@ -5136,10 +5321,11 @@ static int shrink_conversion_region_threshhold = 1024;
    } while (0)
  
  static Lisp_Object
-code_convert_region_unwind (dummy)
-     Lisp_Object dummy;
+code_convert_region_unwind (arg)
+     Lisp_Object arg;
  {
    inhibit_pre_post_conversion = 0;
+  Vlast_coding_system_used = arg;
    return Qnil;
  }
  
@@ -5195,7 +5381,7 @@ coding_save_composition (coding, from, to, obj)
               else if (VECTORP (val) || STRINGP (val))
                 {
                   int len = (VECTORP (val)
-                            ? XVECTOR (val)->size : XSTRING (val)->size);
+                            ? XVECTOR (val)->size : SCHARS (val));
                   int i;
                   for (i = 0; i < len; i++)
                     {
@@ -5223,7 +5409,7 @@ coding_save_composition (coding, from, to, obj)
  }
  
  /* Reflect the saved information about compositions to OBJ.
-   CODING->cmp_data points to a memory block for the informaiton.  OBJ
+   CODING->cmp_data points to a memory block for the information.  OBJ
     is a buffer or a string, defaults to the current buffer.  */
  
  void
@@ -5257,6 +5443,9 @@ coding_restore_composition (coding, obj)
               int len = data[0] - 4, j;
               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
  
+             if (method == COMPOSITION_WITH_RULE_ALTCHARS
+                 && len % 2 == 0)
+               len --;
               for (j = 0; j < len; j++)
                 args[j] = make_number (data[4 + j]);
               components = (method == COMPOSITION_WITH_ALTCHARS
@@ -5282,7 +5471,7 @@ coding_restore_composition (coding, obj)
     replace_range (insdel.c) to know what we are doing.
  
     If REPLACE is zero, it is assumed that the source text is unibyte.
-   Otherwize, it is assumed that the source text is multibyte.  */
+   Otherwise, it is assumed that the source text is multibyte.  */
  
  int
  code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
@@ -5290,6 +5479,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
       struct coding_system *coding;
  {
    int len = to - from, len_byte = to_byte - from_byte;
+  int nchars_del = 0, nbytes_del = 0;
    int require, inserted, inserted_byte;
    int head_skip, tail_skip, total_skip = 0;
    Lisp_Object saved_coding_symbol;
@@ -5301,7 +5491,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
    int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
  
    deletion = Qnil;
-  saved_coding_symbol = Qnil;
+  saved_coding_symbol = coding->symbol;
  
    if (from < PT && PT < to)
      {
@@ -5348,17 +5538,20 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
                  encodings again in vain.  */
               coding->type = coding_type_emacs_mule;
               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
+             /* As emacs-mule decoder will handle composition, we
+                need this setting to allocate coding->cmp_data
+                later.  */
+             coding->composing = COMPOSITION_NO;
             }
         }
        if (coding->eol_type == CODING_EOL_UNDECIDED
           && coding->type != coding_type_ccl)
         {
-         saved_coding_symbol = coding->symbol;
           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
           if (coding->eol_type == CODING_EOL_UNDECIDED)
             coding->eol_type = CODING_EOL_LF;
           /* We had better recover the original eol format if we
-            encounter an inconsitent eol format while decoding.  */
+            encounter an inconsistent eol format while decoding.  */
           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
         }
      }
@@ -5375,9 +5568,9 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
           new buffer.  */
        struct buffer *prev = current_buffer;
        Lisp_Object new;
-      int count = specpdl_ptr - specpdl;
  
-      record_unwind_protect (code_convert_region_unwind, Qnil);
+      record_unwind_protect (code_convert_region_unwind,
+                            Vlast_coding_system_used);
        /* We should not call any more pre-write/post-read-conversion
           functions while this pre-write-conversion is running.  */
        inhibit_pre_post_conversion = 1;
@@ -5410,7 +5603,15 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
      }
  
    if (replace)
-    deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
+    {
+      if (! EQ (current_buffer->undo_list, Qt))
+       deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
+      else
+       {
+         nchars_del = to - from;
+         nbytes_del = to_byte - from_byte;
+       }
+    }
  
    if (coding->composing != COMPOSITION_DISABLED)
      {
@@ -5448,7 +5649,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
        len -= total_skip; len_byte -= total_skip;
      }
  
-  /* For converion, we must put the gap before the text in addition to
+  /* For conversion, we must put the gap before the text in addition to
       making the gap larger for efficient decoding.  The required gap
       size starts from 2000 which is the magic number used in make_gap.
       But, after one batch of conversion, it will be incremented if we
@@ -5499,7 +5700,11 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
        if (encodep)
         result = encode_coding (coding, src, dst, len_byte, 0);
        else
-       result = decode_coding (coding, src, dst, len_byte, 0);
+       {
+         if (coding->composing != COMPOSITION_DISABLED)
+           coding->cmp_data->char_offset = from + inserted;
+         result = decode_coding (coding, src, dst, len_byte, 0);
+       }
  
        /* The buffer memory is now:
          +--------+-------converted-text----+--+------original-text----+---+
@@ -5582,7 +5787,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
           else
             coding->symbol = saved_coding_symbol;
-         
+
           continue;
         }
        if (len_byte <= 0)
@@ -5597,10 +5802,26 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
         {
           /* The source text ends in invalid codes.  Let's just
              make them valid buffer contents, and finish conversion.  */
-         inserted += len_byte;
-         inserted_byte += len_byte;
-         while (len_byte--)
-           *dst++ = *src++;
+         if (multibyte_p)
+           {
+             unsigned char *start = dst;
+
+             inserted += len_byte;
+             while (len_byte--)
+               {
+                 int c = *src++;
+                 dst += CHAR_STRING (c, dst);
+               }
+
+             inserted_byte += dst - start;
+           }
+         else
+           {
+             inserted += len_byte;
+             inserted_byte += len_byte;
+             while (len_byte--)
+               *dst++ = *src++;
+           }
           break;
         }
        if (result == CODING_FINISH_INTERRUPT)
@@ -5618,7 +5839,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
        if (first)
         {
           /* We have just done the first batch of conversion which was
-            stoped because of insufficient gap.  Let's reconsider the
+            stopped because of insufficient gap.  Let's reconsider the
              required gap size (i.e. SRT - DST) now.
  
              We have converted ORIG bytes (== coding->consumed) into
@@ -5627,9 +5848,19 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
              Here, we are sure that NEW >= ORIG.  */
-         float ratio = coding->produced - coding->consumed;
-         ratio /= coding->consumed;
-         require = len_byte * ratio;
+         float ratio;
+
+         if (coding->produced <= coding->consumed)
+           {
+             /* This happens because of CCL-based coding system with
+                eol-type CRLF.  */
+             require = 0;
+           }
+         else
+           {
+             ratio = (coding->produced - coding->consumed) / coding->consumed;
+             require = len_byte * ratio;
+           }
           first = 0;
         }
        if ((src - dst) < (require + 2000))
@@ -5667,7 +5898,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
        inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
      }
  
-  /* If we have shrinked the conversion area, adjust it now.  */ 
+  /* If we shrank the conversion area, adjust it now.  */
    if (total_skip > 0)
      {
        if (tail_skip > 0)
@@ -5682,7 +5913,11 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
      }
  
    prev_Z = Z;
-  adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
+  if (! EQ (current_buffer->undo_list, Qt))
+    adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
+  else
+    adjust_after_replace_noundo (from, from_byte, nchars_del, nbytes_del,
+                                inserted, inserted_byte);
    inserted = Z - prev_Z;
  
    if (!encodep && coding->cmp_data && coding->cmp_data->used)
@@ -5693,20 +5928,25 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
        && ! encodep && ! NILP (coding->post_read_conversion))
      {
        Lisp_Object val;
-      int count = specpdl_ptr - specpdl;
+      Lisp_Object saved_coding_system;
  
        if (from != PT)
         TEMP_SET_PT_BOTH (from, from_byte);
        prev_Z = Z;
-      record_unwind_protect (code_convert_region_unwind, Qnil);
+      record_unwind_protect (code_convert_region_unwind,
+                            Vlast_coding_system_used);
+      saved_coding_system = Vlast_coding_system_used;
+      Vlast_coding_system_used = coding->symbol;
        /* We should not call any more pre-write/post-read-conversion
           functions while this post-read-conversion is running.  */
        inhibit_pre_post_conversion = 1;
        val = call1 (coding->post_read_conversion, make_number (inserted));
        inhibit_pre_post_conversion = 0;
+      coding->symbol = Vlast_coding_system_used;
+      Vlast_coding_system_used = saved_coding_system;
        /* Discard the unwind protect.  */
        specpdl_ptr--;
-      CHECK_NUMBER (val, 0);
+      CHECK_NUMBER (val);
        inserted += Z - prev_Z;
      }
  
@@ -5741,33 +5981,53 @@ run_pre_post_conversion_on_str (str, coding, encodep)
       struct coding_system *coding;
       int encodep;
  {
-  int count = specpdl_ptr - specpdl;
-  struct gcpro gcpro1;
-  struct buffer *prev = current_buffer;
+  int count = SPECPDL_INDEX ();
+  struct gcpro gcpro1, gcpro2;
    int multibyte = STRING_MULTIBYTE (str);
+  Lisp_Object buffer;
+  struct buffer *buf;
+  Lisp_Object old_deactivate_mark;
  
    record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
-  record_unwind_protect (code_convert_region_unwind, Qnil);
-  GCPRO1 (str);
-  temp_output_buffer_setup (" *code-converting-work*");
-  set_buffer_internal (XBUFFER (Vstandard_output));
+  record_unwind_protect (code_convert_region_unwind,
+                        Vlast_coding_system_used);
+  /* It is not crucial to specbind this.  */
+  old_deactivate_mark = Vdeactivate_mark;
+  GCPRO2 (str, old_deactivate_mark);
+
+  buffer = Fget_buffer_create (build_string (" *code-converting-work*"));
+  buf = XBUFFER (buffer);
+
+  delete_all_overlays (buf);
+  buf->directory = current_buffer->directory;
+  buf->read_only = Qnil;
+  buf->filename = Qnil;
+  buf->undo_list = Qt;
+  eassert (buf->overlays_before == NULL);
+  eassert (buf->overlays_after == NULL);
+
+  set_buffer_internal (buf);
    /* We must insert the contents of STR as is without
       unibyte<->multibyte conversion.  For that, we adjust the
       multibyteness of the working buffer to that of STR.  */
    Ferase_buffer ();
-  current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
+  buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
+
    insert_from_string (str, 0, 0,
-                     XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0);
+                     SCHARS (str), SBYTES (str), 0);
    UNGCPRO;
    inhibit_pre_post_conversion = 1;
    if (encodep)
      call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
    else
      {
+      Vlast_coding_system_used = coding->symbol;
        TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
        call1 (coding->post_read_conversion, make_number (Z - BEG));
+      coding->symbol = Vlast_coding_system_used;
      }
    inhibit_pre_post_conversion = 0;
+  Vdeactivate_mark = old_deactivate_mark;
    str = make_buffer_string (BEG, Z, 1);
    return unbind_to (count, str);
  }
@@ -5780,8 +6040,7 @@ decode_coding_string (str, coding, nocopy)
  {
    int len;
    struct conversion_buffer buf;
-  int from, to, to_byte;
-  struct gcpro gcpro1;
+  int from, to_byte;
    Lisp_Object saved_coding_symbol;
    int result;
    int require_decoding;
@@ -5790,10 +6049,9 @@ decode_coding_string (str, coding, nocopy)
    int consumed, consumed_char, produced, produced_char;
  
    from = 0;
-  to = XSTRING (str)->size;
-  to_byte = STRING_BYTES (XSTRING (str));
+  to_byte = SBYTES (str);
  
-  saved_coding_symbol = Qnil;
+  saved_coding_symbol = coding->symbol;
    coding->src_multibyte = STRING_MULTIBYTE (str);
    coding->dst_multibyte = 1;
    if (CODING_REQUIRE_DETECTION (coding))
@@ -5801,19 +6059,26 @@ decode_coding_string (str, coding, nocopy)
        /* See the comments in code_convert_region.  */
        if (coding->type == coding_type_undecided)
         {
-         detect_coding (coding, XSTRING (str)->data, to_byte);
+         detect_coding (coding, SDATA (str), to_byte);
           if (coding->type == coding_type_undecided)
-           coding->type = coding_type_emacs_mule;
+           {
+             coding->type = coding_type_emacs_mule;
+             coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
+             /* As emacs-mule decoder will handle composition, we
+                need this setting to allocate coding->cmp_data
+                later.  */
+             coding->composing = COMPOSITION_NO;
+           }
         }
        if (coding->eol_type == CODING_EOL_UNDECIDED
           && coding->type != coding_type_ccl)
         {
           saved_coding_symbol = coding->symbol;
-         detect_eol (coding, XSTRING (str)->data, to_byte);
+         detect_eol (coding, SDATA (str), to_byte);
           if (coding->eol_type == CODING_EOL_UNDECIDED)
             coding->eol_type = CODING_EOL_LF;
           /* We had better recover the original eol format if we
-            encounter an inconsitent eol format while decoding.  */
+            encounter an inconsistent eol format while decoding.  */
           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
         }
      }
@@ -5828,7 +6093,7 @@ decode_coding_string (str, coding, nocopy)
      {
        /* Decoding routines expect the source text to be unibyte.  */
        str = Fstring_as_unibyte (str);
-      to_byte = STRING_BYTES (XSTRING (str));
+      to_byte = SBYTES (str);
        nocopy = 1;
        coding->src_multibyte = 0;
      }
@@ -5836,24 +6101,26 @@ decode_coding_string (str, coding, nocopy)
    /* Try to skip the heading and tailing ASCIIs.  */
    if (require_decoding && coding->type != coding_type_ccl)
      {
-      SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
+      SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
                                 0);
        if (from == to_byte)
         require_decoding = 0;
-      shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
+      shrinked_bytes = from + (SBYTES (str) - to_byte);
      }
  
-  if (!require_decoding)
+  if (!require_decoding
+      && !(SYMBOLP (coding->post_read_conversion)
+          && !NILP (Ffboundp (coding->post_read_conversion))))
      {
-      coding->consumed = STRING_BYTES (XSTRING (str));
-      coding->consumed_char = XSTRING (str)->size;
+      coding->consumed = SBYTES (str);
+      coding->consumed_char = SCHARS (str);
        if (coding->dst_multibyte)
         {
           str = Fstring_as_multibyte (str);
           nocopy = 1;
         }
-      coding->produced = STRING_BYTES (XSTRING (str));
-      coding->produced_char = XSTRING (str)->size;
+      coding->produced = SBYTES (str);
+      coding->produced_char = SCHARS (str);
        return (nocopy ? str : Fcopy_sequence (str));
      }
  
@@ -5865,7 +6132,7 @@ decode_coding_string (str, coding, nocopy)
    consumed = consumed_char = produced = produced_char = 0;
    while (1)
      {
-      result = decode_coding (coding, XSTRING (str)->data + from + consumed,
+      result = decode_coding (coding, SDATA (str) + from + consumed,
                               buf.data + produced, to_byte - from - consumed,
                               buf.size - produced);
        consumed += coding->consumed;
@@ -5882,6 +6149,8 @@ decode_coding_string (str, coding, nocopy)
         extend_conversion_buffer (&buf);
        else if (result == CODING_FINISH_INCONSISTENT_EOL)
         {
+         Lisp_Object eol_type;
+
           /* Recover the original EOL format.  */
           if (coding->eol_type == CODING_EOL_CR)
             {
@@ -5904,9 +6173,20 @@ decode_coding_string (str, coding, nocopy)
                 }
               produced += num_eol;
               produced_char += num_eol;
-           } 
+           }
+         /* Suppress eol-format conversion in the further conversion.  */
           coding->eol_type = CODING_EOL_LF;
-         coding->symbol = saved_coding_symbol;
+
+         /* Set the coding system symbol to that for Unix-like EOL.  */
+         eol_type = Fget (saved_coding_symbol, Qeol_type);
+         if (VECTORP (eol_type)
+             && XVECTOR (eol_type)->size == 3
+             && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
+           coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
+         else
+           coding->symbol = saved_coding_symbol;
+
+
         }
      }
  
@@ -5921,12 +6201,12 @@ decode_coding_string (str, coding, nocopy)
    else
      newstr = make_uninit_string (produced + shrinked_bytes);
    if (from > 0)
-    bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
-  bcopy (buf.data, XSTRING (newstr)->data + from, produced);
+    STRING_COPYIN (newstr, 0, SDATA (str), from);
+  STRING_COPYIN (newstr, from, buf.data, produced);
    if (shrinked_bytes > from)
-    bcopy (XSTRING (str)->data + to_byte,
-          XSTRING (newstr)->data + from + produced,
-          shrinked_bytes - from);
+    STRING_COPYIN (newstr, from + produced,
+                  SDATA (str) + to_byte,
+                  shrinked_bytes - from);
    free_conversion_buffer (&buf);
  
    if (coding->cmp_data && coding->cmp_data->used)
@@ -5949,8 +6229,6 @@ encode_coding_string (str, coding, nocopy)
    int len;
    struct conversion_buffer buf;
    int from, to, to_byte;
-  struct gcpro gcpro1;
-  Lisp_Object saved_coding_symbol;
    int result;
    int shrinked_bytes = 0;
    Lisp_Object newstr;
@@ -5961,10 +6239,8 @@ encode_coding_string (str, coding, nocopy)
      str = run_pre_post_conversion_on_str (str, coding, 1);
  
    from = 0;
-  to = XSTRING (str)->size;
-  to_byte = STRING_BYTES (XSTRING (str));
-
-  saved_coding_symbol = Qnil;
+  to = SCHARS (str);
+  to_byte = SBYTES (str);
  
    /* Encoding routines determine the multibyteness of the source text
       by coding->src_multibyte.  */
@@ -5972,15 +6248,15 @@ encode_coding_string (str, coding, nocopy)
    coding->dst_multibyte = 0;
    if (! CODING_REQUIRE_ENCODING (coding))
      {
-      coding->consumed = STRING_BYTES (XSTRING (str));
-      coding->consumed_char = XSTRING (str)->size;
+      coding->consumed = SBYTES (str);
+      coding->consumed_char = SCHARS (str);
        if (STRING_MULTIBYTE (str))
         {
           str = Fstring_as_unibyte (str);
           nocopy = 1;
         }
-      coding->produced = STRING_BYTES (XSTRING (str));
-      coding->produced_char = XSTRING (str)->size;
+      coding->produced = SBYTES (str);
+      coding->produced_char = SCHARS (str);
        return (nocopy ? str : Fcopy_sequence (str));
      }
  
@@ -5990,11 +6266,11 @@ encode_coding_string (str, coding, nocopy)
    /* Try to skip the heading and tailing ASCIIs.  */
    if (coding->type != coding_type_ccl)
      {
-      SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
+      SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
                                 1);
        if (from == to_byte)
         return (nocopy ? str : Fcopy_sequence (str));
-      shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
+      shrinked_bytes = from + (SBYTES (str) - to_byte);
      }
  
    len = encoding_buffer_size (coding, to_byte - from);
@@ -6003,7 +6279,7 @@ encode_coding_string (str, coding, nocopy)
    consumed = consumed_char = produced = produced_char = 0;
    while (1)
      {
-      result = encode_coding (coding, XSTRING (str)->data + from + consumed,
+      result = encode_coding (coding, SDATA (str) + from + consumed,
                               buf.data + produced, to_byte - from - consumed,
                               buf.size - produced);
        consumed += coding->consumed;
@@ -6025,12 +6301,12 @@ encode_coding_string (str, coding, nocopy)
  
    newstr = make_uninit_string (produced + shrinked_bytes);
    if (from > 0)
-    bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
-  bcopy (buf.data, XSTRING (newstr)->data + from, produced);
+    STRING_COPYIN (newstr, 0, SDATA (str), from);
+  STRING_COPYIN (newstr, from, buf.data, produced);
    if (shrinked_bytes > from)
-    bcopy (XSTRING (str)->data + to_byte,
-          XSTRING (newstr)->data + from + produced,
-          shrinked_bytes - from);
+    STRING_COPYIN (newstr, from + produced,
+                  SDATA (str) + to_byte,
+                  shrinked_bytes - from);
  
    free_conversion_buffer (&buf);
    coding_free_composition_data (coding);
@@ -6043,10 +6319,10 @@ encode_coding_string (str, coding, nocopy)
  /*** 8. Emacs Lisp library functions ***/
  
  DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
-  "Return t if OBJECT is nil or a coding-system.\n\
-See the documentation of `make-coding-system' for information\n\
-about coding-system objects.")
-  (obj)
+       doc: /* Return t if OBJECT is nil or a coding-system.
+See the documentation of `make-coding-system' for information
+about coding-system objects.  */)
+     (obj)
       Lisp_Object obj;
  {
    if (NILP (obj))
@@ -6061,8 +6337,8 @@ about coding-system objects.")
  
  DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
         Sread_non_nil_coding_system, 1, 1, 0,
-  "Read a coding system from the minibuffer, prompting with string PROMPT.")
-  (prompt)
+       doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
+     (prompt)
       Lisp_Object prompt;
  {
    Lisp_Object val;
@@ -6071,35 +6347,35 @@ DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
        val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
      }
-  while (XSTRING (val)->size == 0);
+  while (SCHARS (val) == 0);
    return (Fintern (val, Qnil));
  }
  
  DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
-  "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
-If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
-  (prompt, default_coding_system)
+       doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
+If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
+     (prompt, default_coding_system)
       Lisp_Object prompt, default_coding_system;
  {
    Lisp_Object val;
    if (SYMBOLP (default_coding_system))
-    XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
+    default_coding_system = SYMBOL_NAME (default_coding_system);
    val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
                           Qt, Qnil, Qcoding_system_history,
                           default_coding_system, Qnil);
-  return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
+  return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
  }
  
  DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
         1, 1, 0,
-  "Check validity of CODING-SYSTEM.\n\
-If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
-It is valid if it is a symbol with a non-nil `coding-system' property.\n\
-The value of property should be a vector of length 5.")
-  (coding_system)
+       doc: /* Check validity of CODING-SYSTEM.
+If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
+It is valid if it is a symbol with a non-nil `coding-system' property.
+The value of property should be a vector of length 5.  */)
+     (coding_system)
       Lisp_Object coding_system;
  {
-  CHECK_SYMBOL (coding_system, 0);
+  CHECK_SYMBOL (coding_system);
    if (!NILP (Fcoding_system_p (coding_system)))
      return coding_system;
    while (1)
@@ -6108,7 +6384,7 @@ The value of property should be a vector of length 5.")
  \f
  Lisp_Object
  detect_coding_system (src, src_bytes, highest, multibytep)
-     unsigned char *src;
+     const unsigned char *src;
       int src_bytes, highest;
       int multibytep;
  {
@@ -6163,31 +6439,35 @@ detect_coding_system (src, src_bytes, highest, multibytep)
           Lisp_Object eol;
           eol = Fget (XCAR (tmp), Qeol_type);
           if (VECTORP (eol))
-           XCAR (tmp) = XVECTOR (eol)->contents[eol_type];
+           XSETCAR (tmp, XVECTOR (eol)->contents[eol_type]);
         }
      }
    return (highest ? XCAR (val) : val);
-}  
+}
  
  DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
         2, 3, 0,
-  "Detect coding system of the text in the region between START and END.\n\
-Return a list of possible coding systems ordered by priority.\n\
-\n\
-If only ASCII characters are found, it returns a list of single element\n\
-`undecided' or its subsidiary coding system according to a detected\n\
-end-of-line format.\n\
-\n\
-If optional argument HIGHEST is non-nil, return the coding system of\n\
-highest priority.")
-  (start, end, highest)
+       doc: /* Detect how the byte sequence in the region is encoded.
+Return a list of possible coding systems used on decoding a byte
+sequence containing the bytes in the region between START and END when
+the coding system `undecided' is specified.  The list is ordered by
+priority decided in the current language environment.
+
+If only ASCII characters are found, it returns a list of single element
+`undecided' or its subsidiary coding system according to a detected
+end-of-line format.
+
+If optional argument HIGHEST is non-nil, return the coding system of
+highest priority.  */)
+     (start, end, highest)
       Lisp_Object start, end, highest;
  {
    int from, to;
    int from_byte, to_byte;
+  int include_anchor_byte = 0;
  
-  CHECK_NUMBER_COERCE_MARKER (start, 0);
-  CHECK_NUMBER_COERCE_MARKER (end, 1);
+  CHECK_NUMBER_COERCE_MARKER (start);
+  CHECK_NUMBER_COERCE_MARKER (end);
  
    validate_region (&start, &end);
    from = XINT (start), to = XINT (end);
@@ -6196,9 +6476,17 @@ highest priority.")
  
    if (from < GPT && to >= GPT)
      move_gap_both (to, to_byte);
+  /* If we an anchor byte `\0' follows the region, we include it in
+     the detecting source.  Then code detectors can handle the tailing
+     byte sequence more accurately.
  
+     Fix me: This is not a perfect solution.  It is better that we
+     add one more argument, say LAST_BLOCK, to all detect_coding_XXX.
+  */
+  if (to == Z || (to == GPT && GAP_SIZE > 0))
+    include_anchor_byte = 1;
    return detect_coding_system (BYTE_POS_ADDR (from_byte),
-                              to_byte - from_byte,
+                              to_byte - from_byte + include_anchor_byte,
                                !NILP (highest),
                                !NILP (current_buffer
                                       ->enable_multibyte_characters));
@@ -6206,47 +6494,37 @@ highest priority.")
  
  DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
         1, 2, 0,
-  "Detect coding system of the text in STRING.\n\
-Return a list of possible coding systems ordered by priority.\n\
-\n\
-If only ASCII characters are found, it returns a list of single element\n\
-`undecided' or its subsidiary coding system according to a detected\n\
-end-of-line format.\n\
-\n\
-If optional argument HIGHEST is non-nil, return the coding system of\n\
-highest priority.")
-  (string, highest)
+       doc: /* Detect how the byte sequence in STRING is encoded.
+Return a list of possible coding systems used on decoding a byte
+sequence containing the bytes in STRING when the coding system
+`undecided' is specified.  The list is ordered by priority decided in
+the current language environment.
+
+If only ASCII characters are found, it returns a list of single element
+`undecided' or its subsidiary coding system according to a detected
+end-of-line format.
+
+If optional argument HIGHEST is non-nil, return the coding system of
+highest priority.  */)
+     (string, highest)
       Lisp_Object string, highest;
  {
-  CHECK_STRING (string, 0);
-
-  return detect_coding_system (XSTRING (string)->data,
-                              STRING_BYTES (XSTRING (string)),
+  CHECK_STRING (string);
+
+  return detect_coding_system (SDATA (string),
+                              /* "+ 1" is to include the anchor byte
+                                 `\0'.  With this, code detectors can
+                                 handle the tailing bytes more
+                                 accurately.  */
+                              SBYTES (string) + 1,
                                !NILP (highest),
                                STRING_MULTIBYTE (string));
  }
  
-/* Return an intersection of lists L1 and L2.  */
-
-static Lisp_Object
-intersection (l1, l2)
-     Lisp_Object l1, l2;
-{
-  Lisp_Object val;
-
-  for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
-    {
-      if (!NILP (Fmemq (XCAR (l1), l2)))
-       val = Fcons (XCAR (l1), val);
-    }
-  return val;
-}
-
-
  /*  Subroutine for Fsafe_coding_systems_region_internal.
  
      Return a list of coding systems that safely encode the multibyte
-    text between P and PEND.  SAFE_CODINGS, if non-nil, is a list of
+    text between P and PEND.  SAFE_CODINGS, if non-nil, is an alist of
      possible coding systems.  If it is nil, it means that we have not
      yet found any coding systems.
  
@@ -6262,8 +6540,9 @@ find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
       Lisp_Object safe_codings, work_table;
       int *single_byte_char_found;
  {
-  int c, len, idx;
-  Lisp_Object val;
+  int c, len;
+  Lisp_Object val, ch;
+  Lisp_Object prev, tail;
  
    while (p < pend)
      {
@@ -6275,58 +6554,123 @@ find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
        if (SINGLE_BYTE_CHAR_P (c))
         *single_byte_char_found = 1;
        if (NILP (safe_codings))
+       /* Already all coding systems are excluded.  But, we can't
+          terminate the loop here because non-ASCII single-byte char
+          must be found.  */
         continue;
        /* Check the safe coding systems for C.  */
-      val = char_table_ref_and_index (work_table, c, &idx);
+      ch = make_number (c);
+      val = Faref (work_table, ch);
        if (EQ (val, Qt))
         /* This element was already checked.  Ignore it.  */
         continue;
        /* Remember that we checked this element.  */
-      CHAR_TABLE_SET (work_table, make_number (idx), Qt);
+      Faset (work_table, ch, Qt);
  
-      /* If there are some safe coding systems for C and we have
-        already found the other set of coding systems for the
-        different characters, get the intersection of them.  */
-      if (!EQ (safe_codings, Qt) && !NILP (val))
-       val = intersection (safe_codings, val);
-      safe_codings = val;
+      for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
+       {
+         Lisp_Object elt, translation_table, hash_table, accept_latin_extra;
+         int encodable;
+
+         elt = XCAR (tail);
+         if (CONSP (XCDR (elt)))
+           {
+             /* This entry has this format now:
+                ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
+                         ACCEPT-LATIN-EXTRA ) */
+             val = XCDR (elt);
+             encodable = ! NILP (Faref (XCAR (val), ch));
+             if (! encodable)
+               {
+                 val = XCDR (val);
+                 translation_table = XCAR (val);
+                 hash_table = XCAR (XCDR (val));
+                 accept_latin_extra = XCAR (XCDR (XCDR (val)));
+               }
+           }
+         else
+           {
+             /* This entry has this format now: ( CODING . SAFE-CHARS) */
+             encodable = ! NILP (Faref (XCDR (elt), ch));
+             if (! encodable)
+               {
+                 /* Transform the format to:
+                    ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
+                      ACCEPT-LATIN-EXTRA )  */
+                 val = Fget (XCAR (elt), Qcoding_system);
+                 translation_table
+                   = Fplist_get (AREF (val, 3),
+                                 Qtranslation_table_for_encode);
+                 if (SYMBOLP (translation_table))
+                   translation_table = Fget (translation_table,
+                                             Qtranslation_table);
+                 hash_table
+                   = (CHAR_TABLE_P (translation_table)
+                      ? XCHAR_TABLE (translation_table)->extras[1]
+                      : Qnil);
+                 accept_latin_extra
+                   = ((EQ (AREF (val, 0), make_number (2))
+                       && VECTORP (AREF (val, 4)))
+                      ? AREF (AREF (val, 4), 16)
+                      : Qnil);
+                 XSETCAR (tail, list5 (XCAR (elt), XCDR (elt),
+                                       translation_table, hash_table,
+                                       accept_latin_extra));
+               }
+           }
+             
+         if (! encodable
+             && ((CHAR_TABLE_P (translation_table)
+                  && ! NILP (Faref (translation_table, ch)))
+                 || (HASH_TABLE_P (hash_table)
+                     && ! NILP (Fgethash (ch, hash_table, Qnil)))
+                 || (SINGLE_BYTE_CHAR_P (c)
+                     && ! NILP (accept_latin_extra)
+                     && VECTORP (Vlatin_extra_code_table)
+                     && ! NILP (AREF (Vlatin_extra_code_table, c)))))
+           encodable = 1;
+         if (encodable)
+           prev = tail;
+         else
+           {
+             /* Exclude this coding system from SAFE_CODINGS.  */
+             if (EQ (tail, safe_codings))
+               safe_codings = XCDR (safe_codings);
+             else
+               XSETCDR (prev, XCDR (tail));
+           }
+       }
      }
    return safe_codings;
  }
  
-
-/* Return a list of coding systems that safely encode the text between
-   START and END.  If the text contains only ASCII or is unibyte,
-   return t.  */
-
  DEFUN ("find-coding-systems-region-internal",
         Ffind_coding_systems_region_internal,
         Sfind_coding_systems_region_internal, 2, 2, 0,
-  "Internal use only.")
-  (start, end)
+       doc: /* Internal use only.  */)
+     (start, end)
       Lisp_Object start, end;
  {
    Lisp_Object work_table, safe_codings;
    int non_ascii_p = 0;
    int single_byte_char_found = 0;
-  unsigned char *p1, *p1end, *p2, *p2end, *p;
-  Lisp_Object args[2];
+  const unsigned char *p1, *p1end, *p2, *p2end, *p;
  
    if (STRINGP (start))
      {
        if (!STRING_MULTIBYTE (start))
         return Qt;
-      p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
+      p1 = SDATA (start), p1end = p1 + SBYTES (start);
        p2 = p2end = p1end;
-      if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
+      if (SCHARS (start) != SBYTES (start))
         non_ascii_p = 1;
      }
    else
      {
        int from, to, stop;
  
-      CHECK_NUMBER_COERCE_MARKER (start, 0);
-      CHECK_NUMBER_COERCE_MARKER (end, 1);
+      CHECK_NUMBER_COERCE_MARKER (start);
+      CHECK_NUMBER_COERCE_MARKER (end);
        if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
         args_out_of_range (start, end);
        if (NILP (current_buffer->enable_multibyte_characters))
@@ -6351,34 +6695,182 @@ DEFUN ("find-coding-systems-region-internal",
        for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
        if (p == p1end)
         {
-         for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);      
+         for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
           if (p == p2end)
             return Qt;
         }
      }
  
    /* The text contains non-ASCII characters.  */
-  work_table = Fcopy_sequence (Vchar_coding_system_table);
-  safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
+
+  work_table = Fmake_char_table (Qchar_coding_system, Qnil);
+  safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
+
+  safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
                                     &single_byte_char_found);
    if (p2 < p2end)
      safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
                                       &single_byte_char_found);
+  if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
+    safe_codings = Qt;
+  else
+    {
+      /* Turn safe_codings to a list of coding systems... */
+      Lisp_Object val;
+
+      if (single_byte_char_found)
+       /* ... and append these for eight-bit chars.  */
+       val = Fcons (Qraw_text,
+                    Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil)));
+      else
+       /* ... and append generic coding systems.  */
+       val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars));
+
+      for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings))
+       val = Fcons (XCAR (XCAR (safe_codings)), val);
+      safe_codings = val;
+    }
+
+  return safe_codings;
+}
+
+
+/* Search from position POS for such characters that are unencodable
+   accoding to SAFE_CHARS, and return a list of their positions.  P
+   points where in the memory the character at POS exists.  Limit the
+   search at PEND or when Nth unencodable characters are found.
+
+   If SAFE_CHARS is a char table, an element for an unencodable
+   character is nil.
+
+   If SAFE_CHARS is nil, all non-ASCII characters are unencodable.
  
-  if (!single_byte_char_found)
+   Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and
+   eight-bit-graphic characters are unencodable.  */
+
+static Lisp_Object
+unencodable_char_position (safe_chars, pos, p, pend, n)
+     Lisp_Object safe_chars;
+     int pos;
+     unsigned char *p, *pend;
+     int n;
+{
+  Lisp_Object pos_list;
+
+  pos_list = Qnil;
+  while (p < pend)
      {
-      /* Append generic coding systems.  */
-      Lisp_Object args[2];
-      args[0] = safe_codings;
-      args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
-                                       make_number (0));
-      safe_codings = Fappend (2, args);
+      int len;
+      int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len);
+
+      if (c >= 128
+         && (CHAR_TABLE_P (safe_chars)
+             ? NILP (CHAR_TABLE_REF (safe_chars, c))
+             : (NILP (safe_chars) || c < 256)))
+       {
+         pos_list = Fcons (make_number (pos), pos_list);
+         if (--n <= 0)
+           break;
+       }
+      pos++;
+      p += len;
+    }
+  return Fnreverse (pos_list);
+}
+
+
+DEFUN ("unencodable-char-position", Funencodable_char_position,
+       Sunencodable_char_position, 3, 5, 0,
+       doc: /*
+Return position of first un-encodable character in a region.
+START and END specfiy the region and CODING-SYSTEM specifies the
+encoding to check.  Return nil if CODING-SYSTEM does encode the region.
+
+If optional 4th argument COUNT is non-nil, it specifies at most how
+many un-encodable characters to search.  In this case, the value is a
+list of positions.
+
+If optional 5th argument STRING is non-nil, it is a string to search
+for un-encodable characters.  In that case, START and END are indexes
+to the string.  */)
+     (start, end, coding_system, count, string)
+     Lisp_Object start, end, coding_system, count, string;
+{
+  int n;
+  Lisp_Object safe_chars;
+  struct coding_system coding;
+  Lisp_Object positions;
+  int from, to;
+  unsigned char *p, *pend;
+
+  if (NILP (string))
+    {
+      validate_region (&start, &end);
+      from = XINT (start);
+      to = XINT (end);
+      if (NILP (current_buffer->enable_multibyte_characters))
+       return Qnil;
+      p = CHAR_POS_ADDR (from);
+      if (to == GPT)
+       pend = GPT_ADDR;
+      else
+       pend = CHAR_POS_ADDR (to);
      }
    else
-    safe_codings = Fcons (Qraw_text,
-                         Fcons (Qemacs_mule,
-                                Fcons (Qno_conversion, safe_codings)));
-  return safe_codings;
+    {
+      CHECK_STRING (string);
+      CHECK_NATNUM (start);
+      CHECK_NATNUM (end);
+      from = XINT (start);
+      to = XINT (end);
+      if (from > to
+         || to > SCHARS (string))
+       args_out_of_range_3 (string, start, end);
+      if (! STRING_MULTIBYTE (string))
+       return Qnil;
+      p = SDATA (string) + string_char_to_byte (string, from);
+      pend = SDATA (string) + string_char_to_byte (string, to);
+    }
+
+  setup_coding_system (Fcheck_coding_system (coding_system), &coding);
+
+  if (NILP (count))
+    n = 1;
+  else
+    {
+      CHECK_NATNUM (count);
+      n = XINT (count);
+    }
+
+  if (coding.type == coding_type_no_conversion
+      || coding.type == coding_type_raw_text)
+    return Qnil;
+
+  if (coding.type == coding_type_undecided)
+    safe_chars = Qnil;
+  else
+    safe_chars = coding_safe_chars (coding_system);
+
+  if (STRINGP (string)
+      || from >= GPT || to <= GPT)
+    positions = unencodable_char_position (safe_chars, from, p, pend, n);
+  else
+    {
+      Lisp_Object args[2];
+
+      args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n);
+      n -= XINT (Flength (args[0]));
+      if (n <= 0)
+       positions = args[0];
+      else
+       {
+         args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR,
+                                              pend, n);
+         positions = Fappend (2, args);
+       }
+    }
+
+  return  (NILP (count) ? Fcar (positions) : positions);
  }
  
  
@@ -6388,11 +6880,11 @@ code_convert_region1 (start, end, coding_system, encodep)
       int encodep;
  {
    struct coding_system coding;
-  int from, to, len;
+  int from, to;
  
-  CHECK_NUMBER_COERCE_MARKER (start, 0);
-  CHECK_NUMBER_COERCE_MARKER (end, 1);
-  CHECK_SYMBOL (coding_system, 2);
+  CHECK_NUMBER_COERCE_MARKER (start);
+  CHECK_NUMBER_COERCE_MARKER (end);
+  CHECK_SYMBOL (coding_system);
  
    validate_region (&start, &end);
    from = XFASTINT (start);
@@ -6402,7 +6894,7 @@ code_convert_region1 (start, end, coding_system, encodep)
      return make_number (to - from);
  
    if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
-    error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
+    error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
  
    coding.mode |= CODING_MODE_LAST_BLOCK;
    coding.src_multibyte = coding.dst_multibyte
@@ -6415,14 +6907,14 @@ code_convert_region1 (start, end, coding_system, encodep)
  
  DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
         3, 3, "r\nzCoding system: ",
-  "Decode the current region by specified coding system.\n\
-When called from a program, takes three arguments:\n\
-START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
-This function sets `last-coding-system-used' to the precise coding system\n\
-used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
-not fully specified.)\n\
-It returns the length of the decoded text.")
-  (start, end, coding_system)
+       doc: /* Decode the current region from the specified coding system.
+When called from a program, takes three arguments:
+START, END, and CODING-SYSTEM.  START and END are buffer positions.
+This function sets `last-coding-system-used' to the precise coding system
+used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
+not fully specified.)
+It returns the length of the decoded text.  */)
+     (start, end, coding_system)
       Lisp_Object start, end, coding_system;
  {
    return code_convert_region1 (start, end, coding_system, 0);
@@ -6430,14 +6922,14 @@ It returns the length of the decoded text.")
  
  DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
         3, 3, "r\nzCoding system: ",
-  "Encode the current region by specified coding system.\n\
-When called from a program, takes three arguments:\n\
-START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
-This function sets `last-coding-system-used' to the precise coding system\n\
-used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
-not fully specified.)\n\
-It returns the length of the encoded text.")
-  (start, end, coding_system)
+       doc: /* Encode the current region into the specified coding system.
+When called from a program, takes three arguments:
+START, END, and CODING-SYSTEM.  START and END are buffer positions.
+This function sets `last-coding-system-used' to the precise coding system
+used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
+not fully specified.)
+It returns the length of the encoded text.  */)
+     (start, end, coding_system)
       Lisp_Object start, end, coding_system;
  {
    return code_convert_region1 (start, end, coding_system, 1);
@@ -6450,14 +6942,14 @@ code_convert_string1 (string, coding_system, nocopy, encodep)
  {
    struct coding_system coding;
  
-  CHECK_STRING (string, 0);
-  CHECK_SYMBOL (coding_system, 1);
+  CHECK_STRING (string);
+  CHECK_SYMBOL (coding_system);
  
    if (NILP (coding_system))
      return (NILP (nocopy) ? Fcopy_sequence (string) : string);
  
    if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
-    error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
+    error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
  
    coding.mode |= CODING_MODE_LAST_BLOCK;
    string = (encodep
@@ -6470,13 +6962,13 @@ code_convert_string1 (string, coding_system, nocopy, encodep)
  
  DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
         2, 3, 0,
-  "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
-Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
-if the decoding operation is trivial.\n\
-This function sets `last-coding-system-used' to the precise coding system\n\
-used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
-not fully specified.)")
-  (string, coding_system, nocopy)
+       doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
+Optional arg NOCOPY non-nil means it is OK to return STRING itself
+if the decoding operation is trivial.
+This function sets `last-coding-system-used' to the precise coding system
+used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
+not fully specified.)  */)
+     (string, coding_system, nocopy)
       Lisp_Object string, coding_system, nocopy;
  {
    return code_convert_string1 (string, coding_system, nocopy, 0);
@@ -6484,13 +6976,13 @@ not fully specified.)")
  
  DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
         2, 3, 0,
-  "Encode STRING to CODING-SYSTEM, and return the result.\n\
-Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
-if the encoding operation is trivial.\n\
-This function sets `last-coding-system-used' to the precise coding system\n\
-used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
-not fully specified.)")
-  (string, coding_system, nocopy)
+       doc: /* Encode STRING to CODING-SYSTEM, and return the result.
+Optional arg NOCOPY non-nil means it is OK to return STRING itself
+if the encoding operation is trivial.
+This function sets `last-coding-system-used' to the precise coding system
+used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
+not fully specified.)  */)
+     (string, coding_system, nocopy)
       Lisp_Object string, coding_system, nocopy;
  {
    return code_convert_string1 (string, coding_system, nocopy, 1);
@@ -6509,14 +7001,14 @@ code_convert_string_norecord (string, coding_system, encodep)
  {
    struct coding_system coding;
  
-  CHECK_STRING (string, 0);
-  CHECK_SYMBOL (coding_system, 1);
+  CHECK_STRING (string);
+  CHECK_SYMBOL (coding_system);
  
    if (NILP (coding_system))
      return string;
  
    if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
-    error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
+    error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
  
    coding.composing = COMPOSITION_DISABLED;
    coding.mode |= CODING_MODE_LAST_BLOCK;
@@ -6526,15 +7018,15 @@ code_convert_string_norecord (string, coding_system, encodep)
  }
  \f
  DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
-  "Decode a Japanese character which has CODE in shift_jis encoding.\n\
-Return the corresponding character.")
-  (code)
+       doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
+Return the corresponding character.  */)
+     (code)
       Lisp_Object code;
  {
    unsigned char c1, c2, s1, s2;
    Lisp_Object val;
  
-  CHECK_NUMBER (code, 0);
+  CHECK_NUMBER (code);
    s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
    if (s1 == 0)
      {
@@ -6547,7 +7039,7 @@ Return the corresponding character.")
      }
    else
      {
-      if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
+      if ((s1 < 0x80 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF)
           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
         error ("Invalid Shift JIS code: %x", XFASTINT (code));
        DECODE_SJIS (s1, s2, c1, c2);
@@ -6557,15 +7049,15 @@ Return the corresponding character.")
  }
  
  DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
-  "Encode a Japanese character CHAR to shift_jis encoding.\n\
-Return the corresponding code in SJIS.")
-  (ch)
+       doc: /* Encode a Japanese character CHAR to shift_jis encoding.
+Return the corresponding code in SJIS.  */)
+     (ch)
       Lisp_Object ch;
  {
    int charset, c1, c2, s1, s2;
    Lisp_Object val;
  
-  CHECK_NUMBER (ch, 0);
+  CHECK_NUMBER (ch);
    SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
    if (charset == CHARSET_ASCII)
      {
@@ -6588,16 +7080,16 @@ Return the corresponding code in SJIS.")
  }
  
  DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
-  "Decode a Big5 character which has CODE in BIG5 coding system.\n\
-Return the corresponding character.")
-  (code)
+       doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
+Return the corresponding character.  */)
+     (code)
       Lisp_Object code;
  {
    int charset;
    unsigned char b1, b2, c1, c2;
    Lisp_Object val;
  
-  CHECK_NUMBER (code, 0);
+  CHECK_NUMBER (code);
    b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
    if (b1 == 0)
      {
@@ -6617,15 +7109,15 @@ Return the corresponding character.")
  }
  
  DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
-  "Encode the Big5 character CHAR to BIG5 coding system.\n\
-Return the corresponding character code in Big5.")
-  (ch)
+       doc: /* Encode the Big5 character CHAR to BIG5 coding system.
+Return the corresponding character code in Big5.  */)
+     (ch)
       Lisp_Object ch;
  {
    int charset, c1, c2, b1, b2;
    Lisp_Object val;
  
-  CHECK_NUMBER (ch, 0);
+  CHECK_NUMBER (ch);
    SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
    if (charset == CHARSET_ASCII)
      {
@@ -6644,64 +7136,68 @@ Return the corresponding character code in Big5.")
    return val;
  }
  \f
-DEFUN ("set-terminal-coding-system-internal",
-       Fset_terminal_coding_system_internal,
-       Sset_terminal_coding_system_internal, 1, 1, 0, "")
-  (coding_system)
+DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
+       Sset_terminal_coding_system_internal, 1, 1, 0,
+       doc: /* Internal use only.  */)
+     (coding_system)
       Lisp_Object coding_system;
  {
-  CHECK_SYMBOL (coding_system, 0);
+  CHECK_SYMBOL (coding_system);
    setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
    /* We had better not send unsafe characters to terminal.  */
-  terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
-  /* Characer composition should be disabled.  */
+  terminal_coding.mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
+  /* Character composition should be disabled.  */
    terminal_coding.composing = COMPOSITION_DISABLED;
+  /* Error notification should be suppressed.  */
+  terminal_coding.suppress_error = 1;
    terminal_coding.src_multibyte = 1;
    terminal_coding.dst_multibyte = 0;
    return Qnil;
  }
  
-DEFUN ("set-safe-terminal-coding-system-internal",
-       Fset_safe_terminal_coding_system_internal,
-       Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
-  (coding_system)
+DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal,
+       Sset_safe_terminal_coding_system_internal, 1, 1, 0,
+       doc: /* Internal use only.  */)
+     (coding_system)
       Lisp_Object coding_system;
  {
-  CHECK_SYMBOL (coding_system, 0);
+  CHECK_SYMBOL (coding_system);
    setup_coding_system (Fcheck_coding_system (coding_system),
                        &safe_terminal_coding);
-  /* Characer composition should be disabled.  */
+  /* Character composition should be disabled.  */
    safe_terminal_coding.composing = COMPOSITION_DISABLED;
+  /* Error notification should be suppressed.  */
+  terminal_coding.suppress_error = 1;
    safe_terminal_coding.src_multibyte = 1;
    safe_terminal_coding.dst_multibyte = 0;
    return Qnil;
  }
  
-DEFUN ("terminal-coding-system",
-       Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
-  "Return coding system specified for terminal output.")
-  ()
+DEFUN ("terminal-coding-system", Fterminal_coding_system,
+       Sterminal_coding_system, 0, 0, 0,
+       doc: /* Return coding system specified for terminal output.  */)
+     ()
  {
    return terminal_coding.symbol;
  }
  
-DEFUN ("set-keyboard-coding-system-internal",
-       Fset_keyboard_coding_system_internal,
-       Sset_keyboard_coding_system_internal, 1, 1, 0, "")
-  (coding_system)
+DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
+       Sset_keyboard_coding_system_internal, 1, 1, 0,
+       doc: /* Internal use only.  */)
+     (coding_system)
       Lisp_Object coding_system;
  {
-  CHECK_SYMBOL (coding_system, 0);
+  CHECK_SYMBOL (coding_system);
    setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
-  /* Characer composition should be disabled.  */
+  /* Character composition should be disabled.  */
    keyboard_coding.composing = COMPOSITION_DISABLED;
    return Qnil;
  }
  
-DEFUN ("keyboard-coding-system",
-       Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
-  "Return coding system specified for decoding keyboard input.")
-  ()
+DEFUN ("keyboard-coding-system", Fkeyboard_coding_system,
+       Skeyboard_coding_system, 0, 0, 0,
+       doc: /* Return coding system specified for decoding keyboard input.  */)
+     ()
  {
    return keyboard_coding.symbol;
  }
@@ -6709,35 +7205,37 @@ DEFUN ("keyboard-coding-system",
  \f
  DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
         Sfind_operation_coding_system,  1, MANY, 0,
-  "Choose a coding system for an operation based on the target name.\n\
-The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
-DECODING-SYSTEM is the coding system to use for decoding\n\
-\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
-for encoding (in case OPERATION does encoding).\n\
-\n\
-The first argument OPERATION specifies an I/O primitive:\n\
-  For file I/O, `insert-file-contents' or `write-region'.\n\
-  For process I/O, `call-process', `call-process-region', or `start-process'.\n\
-  For network I/O, `open-network-stream'.\n\
-\n\
-The remaining arguments should be the same arguments that were passed\n\
-to the primitive.  Depending on which primitive, one of those arguments\n\
-is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
-whichever argument specifies the file name is TARGET.\n\
-\n\
-TARGET has a meaning which depends on OPERATION:\n\
-  For file I/O, TARGET is a file name.\n\
-  For process I/O, TARGET is a process name.\n\
-  For network I/O, TARGET is a service name or a port number\n\
-\n\
-This function looks up what specified for TARGET in,\n\
-`file-coding-system-alist', `process-coding-system-alist',\n\
-or `network-coding-system-alist' depending on OPERATION.\n\
-They may specify a coding system, a cons of coding systems,\n\
-or a function symbol to call.\n\
-In the last case, we call the function with one argument,\n\
-which is a list of all the arguments given to this function.")
-  (nargs, args)
+       doc: /* Choose a coding system for an operation based on the target name.
+The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
+DECODING-SYSTEM is the coding system to use for decoding
+\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
+for encoding (in case OPERATION does encoding).
+
+The first argument OPERATION specifies an I/O primitive:
+  For file I/O, `insert-file-contents' or `write-region'.
+  For process I/O, `call-process', `call-process-region', or `start-process'.
+  For network I/O, `open-network-stream'.
+
+The remaining arguments should be the same arguments that were passed
+to the primitive.  Depending on which primitive, one of those arguments
+is selected as the TARGET.  For example, if OPERATION does file I/O,
+whichever argument specifies the file name is TARGET.
+
+TARGET has a meaning which depends on OPERATION:
+  For file I/O, TARGET is a file name.
+  For process I/O, TARGET is a process name.
+  For network I/O, TARGET is a service name or a port number
+
+This function looks up what specified for TARGET in,
+`file-coding-system-alist', `process-coding-system-alist',
+or `network-coding-system-alist' depending on OPERATION.
+They may specify a coding system, a cons of coding systems,
+or a function symbol to call.
+In the last case, we call the function with one argument,
+which is a list of all the arguments given to this function.
+
+usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
+     (nargs, args)
       int nargs;
       Lisp_Object *args;
  {
@@ -6749,14 +7247,21 @@ which is a list of all the arguments given to this function.")
    operation = args[0];
    if (!SYMBOLP (operation)
        || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
-    error ("Invalid first arguement");
+    error ("Invalid first argument");
    if (nargs < 1 + XINT (target_idx))
      error ("Too few arguments for operation: %s",
-          XSYMBOL (operation)->name->data);
+          SDATA (SYMBOL_NAME (operation)));
+  /* For write-region, if the 6th argument (i.e. VISIT, the 5th
+     argument to write-region) is string, it must be treated as a
+     target file name.  */
+  if (EQ (operation, Qwrite_region)
+      && nargs > 5
+      && STRINGP (args[5]))
+    target_idx = make_number (4);
    target = args[XINT (target_idx) + 1];
    if (!(STRINGP (target)
         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
-    error ("Invalid %dth argument", XINT (target_idx) + 1);
+    error ("Invalid argument %d", XINT (target_idx) + 1);
  
    chain = ((EQ (operation, Qinsert_file_contents)
             || EQ (operation, Qwrite_region))
@@ -6803,10 +7308,10 @@ which is a list of all the arguments given to this function.")
  
  DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
         Supdate_coding_systems_internal, 0, 0, 0,
-  "Update internal database for ISO2022 and CCL based coding systems.\n\
-When values of any coding categories are changed, you must\n\
-call this function")
-  ()
+       doc: /* Update internal database for ISO2022 and CCL based coding systems.
+When values of any coding categories are changed, you must
+call this function.  */)
+     ()
  {
    int i;
  
@@ -6814,7 +7319,7 @@ call this function")
      {
        Lisp_Object val;
  
-      val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
+      val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[i]);
        if (!NILP (val))
         {
           if (! coding_system_table[i])
@@ -6834,9 +7339,9 @@ call this function")
  
  DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
         Sset_coding_priority_internal, 0, 0, 0,
-  "Update internal database for the current value of `coding-category-list'.\n\
-This function is internal use only.")
-  ()
+       doc: /* Update internal database for the current value of `coding-category-list'.
+This function is internal use only.  */)
+     ()
  {
    int i = 0, idx;
    Lisp_Object val;
@@ -6862,6 +7367,40 @@ This function is internal use only.")
    return Qnil;
  }
  
+DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
+       Sdefine_coding_system_internal, 1, 1, 0,
+       doc: /* Register CODING-SYSTEM as a base coding system.
+This function is internal use only.  */)
+     (coding_system)
+     Lisp_Object coding_system;
+{
+  Lisp_Object safe_chars, slot;
+
+  if (NILP (Fcheck_coding_system (coding_system)))
+    Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
+  safe_chars = coding_safe_chars (coding_system);
+  if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
+    error ("No valid safe-chars property for %s",
+          SDATA (SYMBOL_NAME (coding_system)));
+  if (EQ (safe_chars, Qt))
+    {
+      if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
+       XSETCAR (Vcoding_system_safe_chars,
+                Fcons (coding_system, XCAR (Vcoding_system_safe_chars)));
+    }
+  else
+    {
+      slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars));
+      if (NILP (slot))
+       XSETCDR (Vcoding_system_safe_chars,
+                nconc2 (XCDR (Vcoding_system_safe_chars),
+                        Fcons (Fcons (coding_system, safe_chars), Qnil)));
+      else
+       XSETCDR (slot, safe_chars);
+    }
+  return Qnil;
+}
+
  #endif /* emacs */
  
  \f
@@ -6872,7 +7411,7 @@ init_coding_once ()
  {
    int i;
  
-  /* Emacs' internal format specific initialize routine.  */ 
+  /* Emacs' internal format specific initialize routine.  */
    for (i = 0; i <= 0x20; i++)
      emacs_code_class[i] = EMACS_control_code;
    emacs_code_class[0x0A] = EMACS_linefeed_code;
@@ -7015,9 +7554,12 @@ syms_of_coding ()
        }
    }
  
+  Vcoding_system_safe_chars = Fcons (Qnil, Qnil);
+  staticpro (&Vcoding_system_safe_chars);
+
    Qtranslation_table = intern ("translation-table");
    staticpro (&Qtranslation_table);
-  Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
+  Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
  
    Qtranslation_table_id = intern ("translation-table-id");
    staticpro (&Qtranslation_table_id);
@@ -7039,7 +7581,7 @@ syms_of_coding ()
       But don't staticpro it here--that is done in alloc.c.  */
    Qchar_table_extra_slots = intern ("char-table-extra-slots");
    Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
-  Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1));
+  Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
  
    Qvalid_codes = intern ("valid-codes");
    staticpro (&Qvalid_codes);
@@ -7050,6 +7592,9 @@ syms_of_coding ()
    Qraw_text = intern ("raw-text");
    staticpro (&Qraw_text);
  
+  Qutf_8 = intern ("utf-8");
+  staticpro (&Qutf_8);
+
    defsubr (&Scoding_system_p);
    defsubr (&Sread_coding_system);
    defsubr (&Sread_non_nil_coding_system);
@@ -7057,6 +7602,7 @@ syms_of_coding ()
    defsubr (&Sdetect_coding_region);
    defsubr (&Sdetect_coding_string);
    defsubr (&Sfind_coding_systems_region_internal);
+  defsubr (&Sunencodable_char_position);
    defsubr (&Sdecode_coding_region);
    defsubr (&Sencode_coding_region);
    defsubr (&Sdecode_coding_string);
@@ -7073,27 +7619,33 @@ syms_of_coding ()
    defsubr (&Sfind_operation_coding_system);
    defsubr (&Supdate_coding_systems_internal);
    defsubr (&Sset_coding_priority_internal);
+  defsubr (&Sdefine_coding_system_internal);
  
    DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
-    "List of coding systems.\n\
-\n\
-Do not alter the value of this variable manually.  This variable should be\n\
-updated by the functions `make-coding-system' and\n\
-`define-coding-system-alias'.");
+              doc: /* List of coding systems.
+
+Do not alter the value of this variable manually.  This variable should be
+updated by the functions `make-coding-system' and
+`define-coding-system-alias'.  */);
    Vcoding_system_list = Qnil;
  
    DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
-    "Alist of coding system names.\n\
-Each element is one element list of coding system name.\n\
-This variable is given to `completing-read' as TABLE argument.\n\
-\n\
-Do not alter the value of this variable manually.  This variable should be\n\
-updated by the functions `make-coding-system' and\n\
-`define-coding-system-alias'.");
+              doc: /* Alist of coding system names.
+Each element is one element list of coding system name.
+This variable is given to `completing-read' as TABLE argument.
+
+Do not alter the value of this variable manually.  This variable should be
+updated by the functions `make-coding-system' and
+`define-coding-system-alias'.  */);
    Vcoding_system_alist = Qnil;
  
    DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
-    "List of coding-categories (symbols) ordered by priority.");
+              doc: /* List of coding-categories (symbols) ordered by priority.
+
+On detecting a coding system, Emacs tries code detection algorithms
+associated with each coding-category one by one in this order.  When
+one algorithm agrees with a byte sequence of source text, the coding
+system bound to the corresponding coding-category is selected.  */);
    {
      int i;
  
@@ -7105,193 +7657,206 @@ updated by the functions `make-coding-system' and\n\
    }
  
    DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
-    "Specify the coding system for read operations.\n\
-It is useful to bind this variable with `let', but do not set it globally.\n\
-If the value is a coding system, it is used for decoding on read operation.\n\
-If not, an appropriate element is used from one of the coding system alists:\n\
-There are three such tables, `file-coding-system-alist',\n\
-`process-coding-system-alist', and `network-coding-system-alist'.");
+              doc: /* Specify the coding system for read operations.
+It is useful to bind this variable with `let', but do not set it globally.
+If the value is a coding system, it is used for decoding on read operation.
+If not, an appropriate element is used from one of the coding system alists:
+There are three such tables, `file-coding-system-alist',
+`process-coding-system-alist', and `network-coding-system-alist'.  */);
    Vcoding_system_for_read = Qnil;
  
    DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
-    "Specify the coding system for write operations.\n\
-Programs bind this variable with `let', but you should not set it globally.\n\
-If the value is a coding system, it is used for encoding of output,\n\
-when writing it to a file and when sending it to a file or subprocess.\n\
-\n\
-If this does not specify a coding system, an appropriate element\n\
-is used from one of the coding system alists:\n\
-There are three such tables, `file-coding-system-alist',\n\
-`process-coding-system-alist', and `network-coding-system-alist'.\n\
-For output to files, if the above procedure does not specify a coding system,\n\
-the value of `buffer-file-coding-system' is used.");
+              doc: /* Specify the coding system for write operations.
+Programs bind this variable with `let', but you should not set it globally.
+If the value is a coding system, it is used for encoding of output,
+when writing it to a file and when sending it to a file or subprocess.
+
+If this does not specify a coding system, an appropriate element
+is used from one of the coding system alists:
+There are three such tables, `file-coding-system-alist',
+`process-coding-system-alist', and `network-coding-system-alist'.
+For output to files, if the above procedure does not specify a coding system,
+the value of `buffer-file-coding-system' is used.  */);
    Vcoding_system_for_write = Qnil;
  
    DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
-    "Coding system used in the latest file or process I/O.");
+              doc: /* Coding system used in the latest file or process I/O.
+Also set by `encode-coding-region', `decode-coding-region',
+`encode-coding-string' and `decode-coding-string'.  */);
    Vlast_coding_system_used = Qnil;
  
    DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
-    "*Non-nil means always inhibit code conversion of end-of-line format.\n\
-See info node `Coding Systems' and info node `Text and Binary' concerning\n\
-such conversion.");
+              doc: /* *Non-nil means always inhibit code conversion of end-of-line format.
+See info node `Coding Systems' and info node `Text and Binary' concerning
+such conversion.  */);
    inhibit_eol_conversion = 0;
  
    DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
-    "Non-nil means process buffer inherits coding system of process output.\n\
-Bind it to t if the process output is to be treated as if it were a file\n\
-read from some filesystem.");
+              doc: /* Non-nil means process buffer inherits coding system of process output.
+Bind it to t if the process output is to be treated as if it were a file
+read from some filesystem.  */);
    inherit_process_coding_system = 0;
  
    DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
-    "Alist to decide a coding system to use for a file I/O operation.\n\
-The format is ((PATTERN . VAL) ...),\n\
-where PATTERN is a regular expression matching a file name,\n\
-VAL is a coding system, a cons of coding systems, or a function symbol.\n\
-If VAL is a coding system, it is used for both decoding and encoding\n\
-the file contents.\n\
-If VAL is a cons of coding systems, the car part is used for decoding,\n\
-and the cdr part is used for encoding.\n\
-If VAL is a function symbol, the function must return a coding system\n\
-or a cons of coding systems which are used as above.\n\
-\n\
-See also the function `find-operation-coding-system'\n\
-and the variable `auto-coding-alist'.");
+              doc: /* Alist to decide a coding system to use for a file I/O operation.
+The format is ((PATTERN . VAL) ...),
+where PATTERN is a regular expression matching a file name,
+VAL is a coding system, a cons of coding systems, or a function symbol.
+If VAL is a coding system, it is used for both decoding and encoding
+the file contents.
+If VAL is a cons of coding systems, the car part is used for decoding,
+and the cdr part is used for encoding.
+If VAL is a function symbol, the function must return a coding system
+or a cons of coding systems which are used as above.  The function gets
+the arguments with which `find-operation-coding-system' was called.
+
+See also the function `find-operation-coding-system'
+and the variable `auto-coding-alist'.  */);
    Vfile_coding_system_alist = Qnil;
  
    DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
-    "Alist to decide a coding system to use for a process I/O operation.\n\
-The format is ((PATTERN . VAL) ...),\n\
-where PATTERN is a regular expression matching a program name,\n\
-VAL is a coding system, a cons of coding systems, or a function symbol.\n\
-If VAL is a coding system, it is used for both decoding what received\n\
-from the program and encoding what sent to the program.\n\
-If VAL is a cons of coding systems, the car part is used for decoding,\n\
-and the cdr part is used for encoding.\n\
-If VAL is a function symbol, the function must return a coding system\n\
-or a cons of coding systems which are used as above.\n\
-\n\
-See also the function `find-operation-coding-system'.");
+    doc: /* Alist to decide a coding system to use for a process I/O operation.
+The format is ((PATTERN . VAL) ...),
+where PATTERN is a regular expression matching a program name,
+VAL is a coding system, a cons of coding systems, or a function symbol.
+If VAL is a coding system, it is used for both decoding what received
+from the program and encoding what sent to the program.
+If VAL is a cons of coding systems, the car part is used for decoding,
+and the cdr part is used for encoding.
+If VAL is a function symbol, the function must return a coding system
+or a cons of coding systems which are used as above.
+
+See also the function `find-operation-coding-system'.  */);
    Vprocess_coding_system_alist = Qnil;
  
    DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
-    "Alist to decide a coding system to use for a network I/O operation.\n\
-The format is ((PATTERN . VAL) ...),\n\
-where PATTERN is a regular expression matching a network service name\n\
-or is a port number to connect to,\n\
-VAL is a coding system, a cons of coding systems, or a function symbol.\n\
-If VAL is a coding system, it is used for both decoding what received\n\
-from the network stream and encoding what sent to the network stream.\n\
-If VAL is a cons of coding systems, the car part is used for decoding,\n\
-and the cdr part is used for encoding.\n\
-If VAL is a function symbol, the function must return a coding system\n\
-or a cons of coding systems which are used as above.\n\
-\n\
-See also the function `find-operation-coding-system'.");
+    doc: /* Alist to decide a coding system to use for a network I/O operation.
+The format is ((PATTERN . VAL) ...),
+where PATTERN is a regular expression matching a network service name
+or is a port number to connect to,
+VAL is a coding system, a cons of coding systems, or a function symbol.
+If VAL is a coding system, it is used for both decoding what received
+from the network stream and encoding what sent to the network stream.
+If VAL is a cons of coding systems, the car part is used for decoding,
+and the cdr part is used for encoding.
+If VAL is a function symbol, the function must return a coding system
+or a cons of coding systems which are used as above.
+
+See also the function `find-operation-coding-system'.  */);
    Vnetwork_coding_system_alist = Qnil;
  
    DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
-    "Coding system to use with system messages.");
+              doc: /* Coding system to use with system messages.
+Also used for decoding keyboard input on X Window system.  */);
    Vlocale_coding_system = Qnil;
  
    /* The eol mnemonics are reset in startup.el system-dependently.  */
    DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
-    "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
+              doc: /* *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
    eol_mnemonic_unix = build_string (":");
  
    DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
-    "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
+              doc: /* *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
    eol_mnemonic_dos = build_string ("\\");
  
    DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
-    "*String displayed in mode line for MAC-like (CR) end-of-line format.");
+              doc: /* *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
    eol_mnemonic_mac = build_string ("/");
  
    DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
-    "*String displayed in mode line when end-of-line format is not yet determined.");
+              doc: /* *String displayed in mode line when end-of-line format is not yet determined.  */);
    eol_mnemonic_undecided = build_string (":");
  
    DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
-    "*Non-nil enables character translation while encoding and decoding.");
+              doc: /* *Non-nil enables character translation while encoding and decoding.  */);
    Venable_character_translation = Qt;
  
    DEFVAR_LISP ("standard-translation-table-for-decode",
-    &Vstandard_translation_table_for_decode,
-    "Table for translating characters while decoding.");
+              &Vstandard_translation_table_for_decode,
+              doc: /* Table for translating characters while decoding.  */);
    Vstandard_translation_table_for_decode = Qnil;
  
    DEFVAR_LISP ("standard-translation-table-for-encode",
-    &Vstandard_translation_table_for_encode,
-    "Table for translationg characters while encoding.");
+              &Vstandard_translation_table_for_encode,
+              doc: /* Table for translating characters while encoding.  */);
    Vstandard_translation_table_for_encode = Qnil;
  
    DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
-    "Alist of charsets vs revision numbers.\n\
-While encoding, if a charset (car part of an element) is found,\n\
-designate it with the escape sequence identifing revision (cdr part of the element).");
+              doc: /* Alist of charsets vs revision numbers.
+While encoding, if a charset (car part of an element) is found,
+designate it with the escape sequence identifying revision (cdr part of the element).  */);
    Vcharset_revision_alist = Qnil;
  
    DEFVAR_LISP ("default-process-coding-system",
                &Vdefault_process_coding_system,
-    "Cons of coding systems used for process I/O by default.\n\
-The car part is used for decoding a process output,\n\
-the cdr part is used for encoding a text to be sent to a process.");
+              doc: /* Cons of coding systems used for process I/O by default.
+The car part is used for decoding a process output,
+the cdr part is used for encoding a text to be sent to a process.  */);
    Vdefault_process_coding_system = Qnil;
  
    DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
-    "Table of extra Latin codes in the range 128..159 (inclusive).\n\
-This is a vector of length 256.\n\
-If Nth element is non-nil, the existence of code N in a file\n\
-\(or output of subprocess) doesn't prevent it to be detected as\n\
-a coding system of ISO 2022 variant which has a flag\n\
-`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
-or reading output of a subprocess.\n\
-Only 128th through 159th elements has a meaning.");
+              doc: /* Table of extra Latin codes in the range 128..159 (inclusive).
+This is a vector of length 256.
+If Nth element is non-nil, the existence of code N in a file
+\(or output of subprocess) doesn't prevent it to be detected as
+a coding system of ISO 2022 variant which has a flag
+`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
+or reading output of a subprocess.
+Only 128th through 159th elements has a meaning.  */);
    Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
  
    DEFVAR_LISP ("select-safe-coding-system-function",
                &Vselect_safe_coding_system_function,
-    "Function to call to select safe coding system for encoding a text.\n\
-\n\
-If set, this function is called to force a user to select a proper\n\
-coding system which can encode the text in the case that a default\n\
-coding system used in each operation can't encode the text.\n\
-\n\
-The default value is `select-safe-coding-system' (which see).");
+              doc: /* Function to call to select safe coding system for encoding a text.
+
+If set, this function is called to force a user to select a proper
+coding system which can encode the text in the case that a default
+coding system used in each operation can't encode the text.
+
+The default value is `select-safe-coding-system' (which see).  */);
    Vselect_safe_coding_system_function = Qnil;
  
-  DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
-    "Char-table containing safe coding systems of each characters.\n\
-Each element doesn't include such generic coding systems that can\n\
-encode any characters.   They are in the first extra slot.");
-  Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
+  DEFVAR_BOOL ("coding-system-require-warning",
+              &coding_system_require_warning,
+              doc: /* Internal use only.
+If non-nil, on writing a file, `select-safe-coding-system-function' is
+called even if `coding-system-for-write' is non-nil.  The command
+`universal-coding-system-argument' binds this variable to t temporarily.  */);
+  coding_system_require_warning = 0;
+
  
    DEFVAR_BOOL ("inhibit-iso-escape-detection",
                &inhibit_iso_escape_detection,
-    "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\
-\n\
-By default, on reading a file, Emacs tries to detect how the text is\n\
-encoded.  This code detection is sensitive to escape sequences.  If\n\
-the sequence is valid as ISO2022, the code is determined as one of\n\
-the ISO2022 encodings, and the file is decoded by the corresponding\n\
-coding system (e.g. `iso-2022-7bit').\n\
-\n\
-However, there may be a case that you want to read escape sequences in\n\
-a file as is.  In such a case, you can set this variable to non-nil.\n\
-Then, as the code detection ignores any escape sequences, no file is\n\
-detected as encoded in some ISO2022 encoding.  The result is that all\n\
-escape sequences become visible in a buffer.\n\
-\n\
-The default value is nil, and it is strongly recommended not to change\n\
-it.  That is because many Emacs Lisp source files that contain\n\
-non-ASCII characters are encoded by the coding system `iso-2022-7bit'\n\
-in Emacs's distribution, and they won't be decoded correctly on\n\
-reading if you suppress escape sequence detection.\n\
-\n\
-The other way to read escape sequences in a file without decoding is\n\
-to explicitly specify some coding system that doesn't use ISO2022's\n\
-escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].");
+              doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
+
+By default, on reading a file, Emacs tries to detect how the text is
+encoded.  This code detection is sensitive to escape sequences.  If
+the sequence is valid as ISO2022, the code is determined as one of
+the ISO2022 encodings, and the file is decoded by the corresponding
+coding system (e.g. `iso-2022-7bit').
+
+However, there may be a case that you want to read escape sequences in
+a file as is.  In such a case, you can set this variable to non-nil.
+Then, as the code detection ignores any escape sequences, no file is
+detected as encoded in some ISO2022 encoding.  The result is that all
+escape sequences become visible in a buffer.
+
+The default value is nil, and it is strongly recommended not to change
+it.  That is because many Emacs Lisp source files that contain
+non-ASCII characters are encoded by the coding system `iso-2022-7bit'
+in Emacs's distribution, and they won't be decoded correctly on
+reading if you suppress escape sequence detection.
+
+The other way to read escape sequences in a file without decoding is
+to explicitly specify some coding system that doesn't use ISO2022's
+escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
    inhibit_iso_escape_detection = 0;
+
+  DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
+              doc: /* Char table for translating self-inserting characters.
+This is applied to the result of input methods, not their input.  See also
+`keyboard-translate-table'.  */);
+    Vtranslation_table_for_input = Qnil;
  }
  
  char *
@@ -7308,7 +7873,7 @@ emacs_strerror (error_number)
        Lisp_Object dec = code_convert_string_norecord (build_string (str),
                                                       Vlocale_coding_system,
                                                       0);
-      str = (char *) XSTRING (dec)->data;
+      str = (char *) SDATA (dec);
      }
  
    return str;
@@ -7316,3 +7881,5 @@ emacs_strerror (error_number)
  
  #endif /* emacs */
  
+/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
+   (do not change this comment) */