#

[bpt/emacs.git] / src / coding.c
diff --git a/src/coding.c b/src/coding.c

index e9d9b16..534a5f7 100644 (file)
--- a/src/coding.c
+++ b/src/coding.c
@@ -301,11 +301,11 @@ Lisp_Object Qtarget_idx;
  
  Lisp_Object Vselect_safe_coding_system_function;
  
-/* Mnemonic character of each format of end-of-line.  */
-int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
-/* Mnemonic character to indicate format of end-of-line is not yet
+/* Mnemonic string for each format of end-of-line.  */
+Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
+/* Mnemonic string to indicate format of end-of-line is not yet
     decided.  */
-int eol_mnemonic_undecided;
+Lisp_Object eol_mnemonic_undecided;
  
  /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
     Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
@@ -525,33 +525,37 @@ detect_coding_emacs_mule (src, src_end)
  /*** 3. ISO2022 handlers ***/
  
  /* The following note describes the coding system ISO2022 briefly.
-   Since the intention of this note is to help in understanding of
-   the programs in this file, some parts are NOT ACCURATE or OVERLY
-   SIMPLIFIED.  For the thorough understanding, please refer to the
+   Since the intention of this note is to help understand the
+   functions in this file, some parts are NOT ACCURATE or OVERLY
+   SIMPLIFIED.  For thorough understanding, please refer to the
     original document of ISO2022.
  
     ISO2022 provides many mechanisms to encode several character sets
-   in 7-bit and 8-bit environment.  If one chooses 7-bite environment,
-   all text is encoded by codes of less than 128.  This may make the
-   encoded text a little bit longer, but the text gets more stability
-   to pass through several gateways (some of them strip off the MSB).
-
-   There are two kinds of character set: control character set and
+   in 7-bit and 8-bit environments.  For 7-bite environments, all text
+   is encoded using bytes less than 128.  This may make the encoded
+   text a little bit longer, but the text passes more easily through
+   several gateways, some of which strip off MSB (Most Signigant Bit).
+ 
+   There are two kinds of character sets: control character set and
     graphic character set.  The former contains control characters such
     as `newline' and `escape' to provide control functions (control
-   functions are provided also by escape sequences).  The latter
-   contains graphic characters such as ' A' and '-'.  Emacs recognizes
+   functions are also provided by escape sequences).  The latter
+   contains graphic characters such as 'A' and '-'.  Emacs recognizes
     two control character sets and many graphic character sets.
  
     Graphic character sets are classified into one of the following
-   four classes, DIMENSION1_CHARS94, DIMENSION1_CHARS96,
-   DIMENSION2_CHARS94, DIMENSION2_CHARS96 according to the number of
-   bytes (DIMENSION) and the number of characters in one dimension
-   (CHARS) of the set.  In addition, each character set is assigned an
-   identification tag (called "final character" and denoted as <F>
-   here after) which is unique in each class.  <F> of each character
-   set is decided by ECMA(*) when it is registered in ISO.  Code range
-   of <F> is 0x30..0x7F (0x30..0x3F are for private use only).
+   four classes, according to the number of bytes (DIMENSION) and
+   number of characters in one dimension (CHARS) of the set:
+   - DIMENSION1_CHARS94
+   - DIMENSION1_CHARS96
+   - DIMENSION2_CHARS94
+   - DIMENSION2_CHARS96
+
+   In addition, each character set is assigned an identification tag,
+   unique for each set, called "final character" (denoted as <F>
+   hereafter).  The <F> of each character set is decided by ECMA(*)
+   when it is registered in ISO.  The code range of <F> is 0x30..0x7F
+   (0x30..0x3F are for private use only).
  
     Note (*): ECMA = European Computer Manufacturers Association
  
@@ -561,55 +565,61 @@ detect_coding_emacs_mule (src, src_end)
         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
         o DIMENSION2_CHARS96 -- none for the moment
  
-   A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
+   A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
         C0 [0x00..0x1F] -- control character plane 0
         GL [0x20..0x7F] -- graphic character plane 0
         C1 [0x80..0x9F] -- control character plane 1
         GR [0xA0..0xFF] -- graphic character plane 1
  
     A control character set is directly designated and invoked to C0 or
-   C1 by an escape sequence.  The most common case is that ISO646's
-   control character set is designated/invoked to C0 and ISO6429's
-   control character set is designated/invoked to C1, and usually
-   these designations/invocations are omitted in a coded text.  With
-   7-bit environment, only C0 can be used, and a control character for
-   C1 is encoded by an appropriate escape sequence to fit in the
-   environment.  All control characters for C1 are defined the
-   corresponding escape sequences.
+   C1 by an escape sequence.  The most common case is that:
+   - ISO646's  control character set is designated/invoked to C0, and
+   - ISO6429's control character set is designated/invoked to C1,
+   and usually these designations/invocations are omitted in encoded
+   text.  In a 7-bit environment, only C0 can be used, and a control
+   character for C1 is encoded by an appropriate escape sequence to
+   fit into the environment.  All control characters for C1 are
+   defined to have corresponding escape sequences.
  
     A graphic character set is at first designated to one of four
     graphic registers (G0 through G3), then these graphic registers are
     invoked to GL or GR.  These designations and invocations can be
     done independently.  The most common case is that G0 is invoked to
-   GL, G1 is invoked to GR, and ASCII is designated to G0, and usually
-   these invocations and designations are omitted in a coded text.
-   With 7-bit environment, only GL can be used.
+   GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
+   these invocations and designations are omitted in encoded text.
+   In a 7-bit environment, only GL can be used.
  
-   When a graphic character set of CHARS94 is invoked to GL, code 0x20
-   and 0x7F of GL area work as control characters SPACE and DEL
-   respectively, and code 0xA0 and 0xFF of GR area should not be used.
+   When a graphic character set of CHARS94 is invoked to GL, codes
+   0x20 and 0x7F of the GL area work as control characters SPACE and
+   DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
+   be used.
  
     There are two ways of invocation: locking-shift and single-shift.
     With locking-shift, the invocation lasts until the next different
-   invocation, whereas with single-shift, the invocation works only
-   for the following character and doesn't affect locking-shift.
-   Invocations are done by the following control characters or escape
-   sequences.
+   invocation, whereas with single-shift, the invocation affects the
+   following character only and doesn't affect the locking-shift
+   state.  Invocations are done by the following control characters or
+   escape sequences:
  
     ----------------------------------------------------------------------
-   function            control char    escape sequence description
+   abbrev  function                 cntrl escape seq   description
     ----------------------------------------------------------------------
-   SI  (shift-in)              0x0F    none            invoke G0 to GL
-   SO  (shift-out)             0x0E    none            invoke G1 to GL
-   LS2 (locking-shift-2)       none    ESC 'n'         invoke G2 into GL
-   LS3 (locking-shift-3)       none    ESC 'o'         invoke G3 into GL
-   SS2 (single-shift-2)                0x8E    ESC 'N'         invoke G2 into GL
-   SS3 (single-shift-3)                0x8F    ESC 'O'         invoke G3 into GL
+   SI/LS0  (shift-in)               0x0F  none         invoke G0 into GL
+   SO/LS1  (shift-out)              0x0E  none         invoke G1 into GL
+   LS2     (locking-shift-2)        none  ESC 'n'      invoke G2 into GL
+   LS3     (locking-shift-3)        none  ESC 'o'      invoke G3 into GL
+   LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
+   LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
+   LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
+   SS2     (single-shift-2)         0x8E  ESC 'N'      invoke G2 for one char
+   SS3     (single-shift-3)         0x8F  ESC 'O'      invoke G3 for one char
     ----------------------------------------------------------------------
-   The first four are for locking-shift.  Control characters for these
-   functions are defined by macros ISO_CODE_XXX in `coding.h'.
+   (*) These are not used by any known coding system.
+
+   Control characters for these functions are defined by macros
+   ISO_CODE_XXX in `coding.h'.
  
-   Designations are done by the following escape sequences.
+   Designations are done by the following escape sequences:
     ----------------------------------------------------------------------
     escape sequence     description
     ----------------------------------------------------------------------
@@ -632,40 +642,40 @@ detect_coding_emacs_mule (src, src_end)
     ----------------------------------------------------------------------
  
     In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
-   of dimension 1, chars 94, and final character <F>, and etc.
+   of dimension 1, chars 94, and final character <F>, etc...
  
     Note (*): Although these designations are not allowed in ISO2022,
     Emacs accepts them on decoding, and produces them on encoding
-   CHARS96 character set in a coding system which is characterized as
+   CHARS96 character sets in a coding system which is characterized as
     7-bit environment, non-locking-shift, and non-single-shift.
  
     Note (**): If <F> is '@', 'A', or 'B', the intermediate character
-   '(' can be omitted.  We call this as "short-form" here after.
+   '(' can be omitted.  We refer to this as "short-form" hereafter.
  
     Now you may notice that there are a lot of ways for encoding the
-   same multilingual text in ISO2022.  Actually, there exists many
-   coding systems such as Compound Text (used in X's inter client
-   communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
-   (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
+   same multilingual text in ISO2022.  Actually, there exist many
+   coding systems such as Compound Text (used in X11's inter client
+   communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
+   (used in Korean internet), EUC (Extended UNIX Code, used in Asian
     localized platforms), and all of these are variants of ISO2022.
  
     In addition to the above, Emacs handles two more kinds of escape
     sequences: ISO6429's direction specification and Emacs' private
     sequence for specifying character composition.
  
-   ISO6429's direction specification takes the following format:
+   ISO6429's direction specification takes the following form:
         o CSI ']'      -- end of the current direction
         o CSI '0' ']'  -- end of the current direction
         o CSI '1' ']'  -- start of left-to-right text
         o CSI '2' ']'  -- start of right-to-left text
     The control character CSI (0x9B: control sequence introducer) is
-   abbreviated to the escape sequence ESC '[' in 7-bit environment.
-   
-   Character composition specification takes the following format:
+   abbreviated to the escape sequence ESC '[' in a 7-bit environment.
+
+   Character composition specification takes the following form:
         o ESC '0' -- start character composition
         o ESC '1' -- end character composition
-   Since these are not standard escape sequences of any ISO, the use
-   of them for these meaning is restricted to Emacs only.  */
+   Since these are not standard escape sequences of any ISO standard,
+   the use of them for these meaning is restricted to Emacs only.  */
  
  enum iso_code_class_type iso_code_class[256];
  
@@ -937,9 +947,13 @@ detect_coding_iso2022 (src, src_end)
  /* Set designation state into CODING.  */
  #define DECODE_DESIGNATION(reg, dimension, chars, final_char)             \
    do {                                                                    \
-    int charset = ISO_CHARSET_TABLE (make_number (dimension),             \
-                                    make_number (chars),                  \
-                                    make_number (final_char));            \
+    int charset;                                                          \
+                                                                          \
+    if (final_char < '0' || final_char >= 128)                            \
+      goto label_invalid_code;                                            \
+    charset = ISO_CHARSET_TABLE (make_number (dimension),                 \
+                                make_number (chars),                      \
+                                make_number (final_char));                \
      if (charset >= 0                                                      \
         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
             || coding->safe_charsets[charset]))                            \
@@ -1039,7 +1053,6 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
      translation_table = Vstandard_translation_table_for_decode;
  
    coding->produced_char = 0;
-  coding->composed_chars = 0;
    coding->fake_multibyte = 0;
    while (src < src_end && (dst_bytes
                            ? (dst < adjusted_dst_end)
@@ -1597,32 +1610,52 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
        dst = encode_invocation_designation (charset, coding, dst);      \
    } while (1)
  
-#define ENCODE_ISO_CHARACTER(charset, c1, c2)                  \
-  do {                                                         \
-    int c_alt, charset_alt;                                    \
-    if (!NILP (translation_table)                              \
-       && ((c_alt = translate_char (translation_table, -1,     \
-                                    charset, c1, c2))          \
-           >= 0))                                              \
-      SPLIT_CHAR (c_alt, charset_alt, c1, c2);                 \
-    else                                                       \
-      charset_alt = charset;                                   \
-    if (CHARSET_DIMENSION (charset_alt) == 1)                  \
-      {                                                                \
-       if (charset == CHARSET_ASCII                            \
-           && coding->flags & CODING_FLAG_ISO_USE_ROMAN)       \
-         charset_alt = charset_latin_jisx0201;                 \
-       ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);      \
-      }                                                                \
-    else                                                       \
-      {                                                                \
-       if (charset == charset_jisx0208                         \
-           && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)      \
-         charset_alt = charset_jisx0208_1978;                  \
-       ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);  \
-      }                                                                \
-    if (! COMPOSING_P (coding->composing))                     \
-      coding->consumed_char++;                                 \
+#define ENCODE_ISO_CHARACTER(charset, c1, c2)                          \
+  do {                                                                 \
+    int c_alt, charset_alt;                                            \
+    if (!NILP (translation_table)                                      \
+       && ((c_alt = translate_char (translation_table, -1,             \
+                                    charset, c1, c2))                  \
+           >= 0))                                                      \
+      SPLIT_CHAR (c_alt, charset_alt, c1, c2);                         \
+    else                                                               \
+      charset_alt = charset;                                           \
+    if (CHARSET_DEFINED_P (charset_alt))                               \
+      {                                                                        \
+       if (CHARSET_DIMENSION (charset_alt) == 1)                       \
+         {                                                             \
+           if (charset == CHARSET_ASCII                                \
+               && coding->flags & CODING_FLAG_ISO_USE_ROMAN)           \
+             charset_alt = charset_latin_jisx0201;                     \
+           ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);          \
+         }                                                             \
+       else                                                            \
+         {                                                             \
+           if (charset == charset_jisx0208                             \
+               && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)          \
+             charset_alt = charset_jisx0208_1978;                      \
+           ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);      \
+         }                                                             \
+      }                                                                        \
+    else                                                               \
+      {                                                                        \
+       if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
+         {                                                             \
+           *dst++ = charset & 0x7f;                                    \
+           *dst++ = c1 & 0x7f;                                         \
+           if (c2)                                                     \
+             *dst++ = c2 & 0x7f;                                       \
+         }                                                             \
+       else                                                            \
+         {                                                             \
+           *dst++ = charset;                                           \
+           *dst++ = c1;                                                \
+           if (c2)                                                     \
+             *dst++ = c2;                                              \
+         }                                                             \
+      }                                                                        \
+    if (! COMPOSING_P (coding->composing))                             \
+      coding->consumed_char++;                                         \
    } while (0)
  
  /* Produce designation and invocation codes at a place pointed by DST
@@ -1869,6 +1902,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
        switch (emacs_code_class[c1])
         {
         case EMACS_ascii_code:
+         c2 = 0;
           ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
           break;
  
@@ -1910,6 +1944,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
  
         case EMACS_leading_code_2:
           ONE_MORE_BYTE (c2);
+         c3 = 0;
           if (c2 < 0xA0)
             {
               /* invalid sequence */
@@ -1923,6 +1958,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
  
         case EMACS_leading_code_3:
           TWO_MORE_BYTES (c2, c3);
+         c4 = 0;
           if (c2 < 0xA0 || c3 < 0xA0)
             {
               /* invalid sequence */
@@ -1978,6 +2014,8 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
           break;
  
         case EMACS_invalid_code:
+         if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
+           ENCODE_RESET_PLANE_AND_REGISTER;
           *dst++ = c1;
           coding->consumed_char++;
           break;
@@ -2029,7 +2067,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
     (character set)     (range)
     ASCII               0x00 .. 0x7F
     KATAKANA-JISX0201   0xA0 .. 0xDF
-   JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xEF
+   JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
     -------------------------------
  
@@ -2092,7 +2130,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
      if (!NILP (translation_table)                                      \
         && ((c_alt = translate_char (translation_table,                 \
                                      -1, (charset), c1, c2)) >= 0))     \
-         SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
+      SPLIT_CHAR (c_alt, charset_alt, c1, c2);                         \
      if (charset_alt == CHARSET_ASCII || charset_alt < 0)               \
        DECODE_CHARACTER_ASCII (c1);                                     \
      else if (CHARSET_DIMENSION (charset_alt) == 1)                     \
@@ -2117,6 +2155,8 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
        {                                                                \
         if (sjis_p && charset_alt == charset_katakana_jisx0201) \
           *dst++ = c1;                                          \
+       else if (sjis_p && charset_alt == charset_latin_jisx0201) \
+         *dst++ = c1 & 0x7F;                                   \
         else                                                    \
           {                                                     \
             *dst++ = charset_alt, *dst++ = c1;                  \
@@ -2126,7 +2166,8 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
      else                                                       \
        {                                                                \
         c1 &= 0x7F, c2 &= 0x7F;                                 \
-       if (sjis_p && charset_alt == charset_jisx0208)          \
+       if (sjis_p && (charset_alt == charset_jisx0208          \
+                      || charset_alt == charset_jisx0208_1978))\
           {                                                     \
             unsigned char s1, s2;                               \
                                                                 \
@@ -2274,7 +2315,10 @@ decode_coding_sjis_big5 (coding, source, destination,
           coding->produced_char++;
         }
        else if (c1 < 0x80)
-       DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
+        {
+          c2 = 0;               /* avoid warning */
+          DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
+        }
        else
         {
           if (sjis_p)
@@ -2293,8 +2337,11 @@ decode_coding_sjis_big5 (coding, source, destination,
                 }
               else if (c1 < 0xE0)
                 /* SJIS -> JISX0201-Kana */
-               DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
-                                           /* dummy */ c2);
+               {
+                 c2 = 0;       /* avoid warning */
+                 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
+                                             /* dummy */ c2);
+               }
               else
                 goto label_invalid_code_1;
             }
@@ -2539,7 +2586,11 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes)
    coding->fake_multibyte = 0;
  
    if (src_bytes <= 0)
-    return result;
+    {
+      coding->produced = coding->produced_char = 0;
+      coding->consumed = coding->consumed_char = 0;
+      return result;
+    }
  
    switch (coding->eol_type)
      {
@@ -2836,7 +2887,12 @@ setup_coding_system (coding_system, coding)
    coding->mode = 0;
    coding->heading_ascii = -1;
    coding->post_read_conversion = coding->pre_write_conversion = Qnil;
+
+  if (NILP (coding_system))
+    goto label_invalid_coding_system;
+
    coding_spec = Fget (coding_system, Qcoding_system);
+
    if (!VECTORP (coding_spec)
        || XVECTOR (coding_spec)->size != 5
        || !CONSP (XVECTOR (coding_spec)->contents[3]))
@@ -2879,6 +2935,7 @@ setup_coding_system (coding_system, coding)
  
    /* Initialize remaining fields.  */
    coding->composing = 0;
+  coding->composed_chars = 0;
  
    /* Get values of coding system properties:
       `post-read-conversion', `pre-write-conversion',
@@ -2917,9 +2974,9 @@ setup_coding_system (coding_system, coding)
        bzero (coding->safe_charsets, MAX_CHARSET + 1);
        while (CONSP (val))
         {
-         if ((i = get_charset_id (XCONS (val)->car)) >= 0)
+         if ((i = get_charset_id (XCAR (val))) >= 0)
             coding->safe_charsets[i] = 1;
-         val = XCONS (val)->cdr;
+         val = XCDR (val);
         }
      }
  
@@ -2985,12 +3042,12 @@ setup_coding_system (coding_system, coding)
         val = Vcharset_revision_alist;
         while (CONSP (val))
           {
-           charset = get_charset_id (Fcar_safe (XCONS (val)->car));
+           charset = get_charset_id (Fcar_safe (XCAR (val)));
             if (charset >= 0
-               && (temp = Fcdr_safe (XCONS (val)->car), INTEGERP (temp))
+               && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
-           val = XCONS (val)->cdr;
+           val = XCDR (val);
           }
  
         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
@@ -3027,28 +3084,28 @@ setup_coding_system (coding_system, coding)
                 tail = flags[i];
  
                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
-               if (INTEGERP (XCONS (tail)->car)
-                   && (charset = XINT (XCONS (tail)->car),
+               if (INTEGERP (XCAR (tail))
+                   && (charset = XINT (XCAR (tail)),
                         CHARSET_VALID_P (charset))
-                   || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
+                   || (charset = get_charset_id (XCAR (tail))) >= 0)
                   {
                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
                   }
                 else
                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
-               tail = XCONS (tail)->cdr;
+               tail = XCDR (tail);
                 while (CONSP (tail))
                   {
-                   if (INTEGERP (XCONS (tail)->car)
-                       && (charset = XINT (XCONS (tail)->car),
+                   if (INTEGERP (XCAR (tail))
+                       && (charset = XINT (XCAR (tail)),
                             CHARSET_VALID_P (charset))
-                       || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
+                       || (charset = get_charset_id (XCAR (tail))) >= 0)
                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
                         = i;
-                   else if (EQ (XCONS (tail)->car, Qt))
+                   else if (EQ (XCAR (tail), Qt))
                       reg_bits |= 1 << i;
-                   tail = XCONS (tail)->cdr;
+                   tail = XCDR (tail);
                   }
               }
             else
@@ -3108,22 +3165,12 @@ setup_coding_system (coding_system, coding)
        coding->common_flags
         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
        {
-       Lisp_Object val;
-       Lisp_Object decoder, encoder;
-
         val = XVECTOR (coding_spec)->contents[4];
-       if (CONSP  (val)
-           && SYMBOLP (XCONS (val)->car)
-           && !NILP (decoder = Fget (XCONS (val)->car, Qccl_program_idx))
-           && !NILP (decoder = Fcdr (Faref (Vccl_program_table, decoder)))
-           && SYMBOLP (XCONS (val)->cdr)
-           && !NILP (encoder = Fget (XCONS (val)->cdr, Qccl_program_idx))
-           && !NILP (encoder = Fcdr (Faref (Vccl_program_table, encoder))))
-         {
-           setup_ccl_program (&(coding->spec.ccl.decoder), decoder);
-           setup_ccl_program (&(coding->spec.ccl.encoder), encoder);
-         }
-       else
+       if (! CONSP (val)
+           || setup_ccl_program (&(coding->spec.ccl.decoder),
+                                 XCAR (val)) < 0
+           || setup_ccl_program (&(coding->spec.ccl.encoder),
+                                 XCDR (val)) < 0)
           goto label_invalid_coding_system;
  
         bzero (coding->spec.ccl.valid_codes, 256);
@@ -3132,18 +3179,18 @@ setup_coding_system (coding_system, coding)
           {
             Lisp_Object this;
  
-           for (; CONSP (val); val = XCONS (val)->cdr)
+           for (; CONSP (val); val = XCDR (val))
               {
-               this = XCONS (val)->car;
+               this = XCAR (val);
                 if (INTEGERP (this)
                     && XINT (this) >= 0 && XINT (this) < 256)
                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
                 else if (CONSP (this)
-                        && INTEGERP (XCONS (this)->car)
-                        && INTEGERP (XCONS (this)->cdr))
+                        && INTEGERP (XCAR (this))
+                        && INTEGERP (XCDR (this)))
                   {
-                   int start = XINT (XCONS (this)->car);
-                   int end = XINT (XCONS (this)->cdr);
+                   int start = XINT (XCAR (this));
+                   int end = XINT (XCDR (this));
  
                     if (start >= 0 && start <= end && end < 256)
                       while (start <= end)
@@ -3196,6 +3243,7 @@ setup_raw_text_coding_system (coding)
             coding->symbol
               = XVECTOR (subsidiaries)->contents[coding->eol_type];
         }
+      setup_coding_system (coding->symbol, coding);
      }
    return;
  }
@@ -3658,7 +3706,9 @@ ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
    coding->produced = ccl_driver (ccl, source, destination,
                                  src_bytes, dst_bytes, &(coding->consumed));
    coding->produced_char
-    = multibyte_chars_in_text (destination, coding->produced);
+    = (encodep
+       ? coding->produced
+       : multibyte_chars_in_text (destination, coding->produced));
    coding->consumed_char
      = multibyte_chars_in_text (source, coding->consumed);
  
@@ -3683,7 +3733,17 @@ ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
  
  /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
     decoding, it may detect coding system and format of end-of-line if
-   those are not yet decided.  */
+   those are not yet decided.
+
+   This function does not make full use of DESTINATION buffer.  For
+   instance, if coding->type is coding_type_iso2022, it uses only
+   (DST_BYTES - 7) bytes of DESTINATION buffer.  In the case that
+   DST_BYTES is decided by the function decoding_buffer_size, it
+   contains extra 256 bytes (defined by CONVERSION_BUFFER_EXTRA_ROOM).
+   So, this function can decode the full SOURCE.  But, in the other
+   case, if you want to avoid carry over, you must supply at least 7
+   bytes more area in DESTINATION buffer than expected maximum bytes
+   that will be produced by this function.  */
  
  int
  decode_coding (coding, source, destination, src_bytes, dst_bytes)
@@ -3694,6 +3754,7 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes)
    int result;
  
    if (src_bytes <= 0
+      && coding->type != coding_type_ccl
        && ! (coding->mode & CODING_MODE_LAST_BLOCK
             && CODING_REQUIRE_FLUSHING (coding)))
      {
@@ -3765,7 +3826,17 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes)
    return result;
  }
  
-/* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  */
+/* See "GENERAL NOTES about `encode_coding_XXX ()' functions".
+
+   This function does not make full use of DESTINATION buffer.  For
+   instance, if coding->type is coding_type_iso2022, it uses only
+   (DST_BYTES - 20) bytes of DESTINATION buffer.  In the case that
+   DST_BYTES is decided by the function encoding_buffer_size, it
+   contains extra 256 bytes (defined by CONVERSION_BUFFER_EXTRA_ROOM).
+   So, this function can encode the full SOURCE.  But, in the other
+   case, if you want to avoid carry over, you must supply at least 20
+   bytes more area in DESTINATION buffer than expected maximum bytes
+   that will be produced by this function.  */
  
  int
  encode_coding (coding, source, destination, src_bytes, dst_bytes)
@@ -4310,6 +4381,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
        move_gap_both (from, from_byte);
      SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
      if (from_byte == to_byte
+       && coding->type != coding_type_ccl
         && ! (coding->mode & CODING_MODE_LAST_BLOCK
               && CODING_REQUIRE_FLUSHING (coding)))
        {
@@ -4360,10 +4432,10 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
    ZV_BYTE -= len_byte;
    Z_BYTE -= len_byte;
  
-  if (GPT - BEG < beg_unchanged)
-    beg_unchanged = GPT - BEG;
-  if (Z - GPT < end_unchanged)
-    end_unchanged = Z - GPT;
+  if (GPT - BEG < BEG_UNCHANGED)
+    BEG_UNCHANGED = GPT - BEG;
+  if (Z - GPT < END_UNCHANGED)
+    END_UNCHANGED = Z - GPT;
  
    for (;;)
      {
@@ -4400,6 +4472,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
        if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
         {
           unsigned char *pend = dst, *p = pend - inserted_byte;
+         Lisp_Object eol_type;
  
           /* Encode LFs back to the original eol format (CR or CRLF).  */
           if (coding->eol_type == CODING_EOL_CR)
@@ -4413,7 +4486,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
               while (p < pend) if (*p++ == '\n') count++;
               if (src - dst < count)
                 {
-                 /* We don't have sufficient room for putting LFs
+                 /* We don't have sufficient room for encoding LFs
                      back to CRLF.  We must record converted and
                      not-yet-converted text back to the buffer
                      content, enlarge the gap, then record them out of
@@ -4446,13 +4519,25 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
           /* Suppress eol-format conversion in the further conversion.  */
           coding->eol_type = CODING_EOL_LF;
  
-         /* Restore the original symbol.  */
-         coding->symbol = saved_coding_symbol;
+         /* Set the coding system symbol to that for Unix-like EOL.  */
+         eol_type = Fget (saved_coding_symbol, Qeol_type);
+         if (VECTORP (eol_type)
+             && XVECTOR (eol_type)->size == 3
+             && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
+           coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
+         else
+           coding->symbol = saved_coding_symbol;
           
           continue;
         }
        if (len_byte <= 0)
-       break;
+       {
+         if (coding->type != coding_type_ccl
+             || coding->mode & CODING_MODE_LAST_BLOCK)
+           break;
+         coding->mode |= CODING_MODE_LAST_BLOCK;
+         continue;
+       }
        if (result == CODING_FINISH_INSUFFICIENT_SRC)
         {
           /* The source text ends in invalid codes.  Let's just
@@ -4547,7 +4632,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
        prev_Z = Z;
        val = call1 (coding->post_read_conversion, make_number (inserted));
        CHECK_NUMBER (val, 0);
-      inserted = Z - prev_Z;
+      inserted += Z - prev_Z;
      }
  
    if (orig_point >= from)
@@ -4791,13 +4876,13 @@ detect_coding_system (src, src_bytes, highest)
  
    /* At first, gather possible coding systems in VAL.  */
    val = Qnil;
-  for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCONS (tmp)->cdr)
+  for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCDR (tmp))
      {
        int idx
-       = XFASTINT (Fget (XCONS (tmp)->car, Qcoding_category_index));
+       = XFASTINT (Fget (XCAR (tmp), Qcoding_category_index));
        if (coding_mask & (1 << idx))
         {
-         val = Fcons (Fsymbol_value (XCONS (tmp)->car), val);
+         val = Fcons (Fsymbol_value (XCAR (tmp)), val);
           if (highest)
             break;
         }
@@ -4806,18 +4891,18 @@ detect_coding_system (src, src_bytes, highest)
      val = Fnreverse (val);
  
    /* Then, replace the elements with subsidiary coding systems.  */
-  for (tmp = val; !NILP (tmp); tmp = XCONS (tmp)->cdr)
+  for (tmp = val; !NILP (tmp); tmp = XCDR (tmp))
      {
        if (eol_type != CODING_EOL_UNDECIDED
           && eol_type != CODING_EOL_INCONSISTENT)
         {
           Lisp_Object eol;
-         eol = Fget (XCONS (tmp)->car, Qeol_type);
+         eol = Fget (XCAR (tmp), Qeol_type);
           if (VECTORP (eol))
-           XCONS (tmp)->car = XVECTOR (eol)->contents[eol_type];
+           XCAR (tmp) = XVECTOR (eol)->contents[eol_type];
         }
      }
-  return (highest ? XCONS (val)->car : val);
+  return (highest ? XCAR (val) : val);
  }  
  
  DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
@@ -5006,8 +5091,7 @@ code_convert_string_norecord (string, coding_system, encodep)
  }
  \f
  DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
-  "Decode a JISX0208 character of shift-jis encoding.\n\
-CODE is the character code in SJIS.\n\
+  "Decode a Japanese character which has CODE in shift_jis encoding.\n\
  Return the corresponding character.")
    (code)
       Lisp_Object code;
@@ -5017,14 +5101,30 @@ Return the corresponding character.")
  
    CHECK_NUMBER (code, 0);
    s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
-  DECODE_SJIS (s1, s2, c1, c2);
-  XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
+  if (s1 == 0)
+    {
+      if (s2 < 0x80)
+       XSETFASTINT (val, s2);
+      else if (s2 >= 0xA0 || s2 <= 0xDF)
+       XSETFASTINT (val,
+                    MAKE_NON_ASCII_CHAR (charset_katakana_jisx0201, s2, 0));
+      else
+       error ("Invalid Shift JIS code: %x", XFASTINT (code));
+    }
+  else
+    {
+      if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
+         || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
+       error ("Invalid Shift JIS code: %x", XFASTINT (code));
+      DECODE_SJIS (s1, s2, c1, c2);
+      XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
+    }
    return val;
  }
  
  DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
-  "Encode a JISX0208 character CHAR to SJIS coding system.\n\
-Return the corresponding character code in SJIS.")
+  "Encode a Japanese character CHAR to shift_jis encoding.\n\
+Return the corresponding code in SJIS.")
    (ch)
       Lisp_Object ch;
  {
@@ -5033,19 +5133,28 @@ Return the corresponding character code in SJIS.")
  
    CHECK_NUMBER (ch, 0);
    SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
-  if (charset == charset_jisx0208)
+  if (charset == CHARSET_ASCII)
+    {
+      val = ch;
+    }
+  else if (charset == charset_jisx0208
+          && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
      {
        ENCODE_SJIS (c1, c2, s1, s2);
        XSETFASTINT (val, (s1 << 8) | s2);
      }
+  else if (charset == charset_katakana_jisx0201
+          && c1 > 0x20 && c2 < 0xE0)
+    {
+      XSETFASTINT (val, c1 | 0x80);
+    }
    else
-    XSETFASTINT (val, 0);
+    error ("Can't encode to shift_jis: %d", XFASTINT (ch));
    return val;
  }
  
  DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
-  "Decode a Big5 character CODE of BIG5 coding system.\n\
-CODE is the character code in BIG5.\n\
+  "Decode a Big5 character which has CODE in BIG5 coding system.\n\
  Return the corresponding character.")
    (code)
       Lisp_Object code;
@@ -5056,8 +5165,20 @@ Return the corresponding character.")
  
    CHECK_NUMBER (code, 0);
    b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
-  DECODE_BIG5 (b1, b2, charset, c1, c2);
-  XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
+  if (b1 == 0)
+    {
+      if (b2 >= 0x80)
+       error ("Invalid BIG5 code: %x", XFASTINT (code));
+      val = code;
+    }
+  else
+    {
+      if ((b1 < 0xA1 || b1 > 0xFE)
+         || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
+       error ("Invalid BIG5 code: %x", XFASTINT (code));
+      DECODE_BIG5 (b1, b2, charset, c1, c2);
+      XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
+    }
    return val;
  }
  
@@ -5072,13 +5193,20 @@ Return the corresponding character code in Big5.")
  
    CHECK_NUMBER (ch, 0);
    SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
-  if (charset == charset_big5_1 || charset == charset_big5_2)
+  if (charset == CHARSET_ASCII)
+    {
+      val = ch;
+    }
+  else if ((charset == charset_big5_1
+           && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
+          || (charset == charset_big5_2
+              && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
      {
        ENCODE_BIG5 (charset, c1, c2, b1, b2);
        XSETFASTINT (val, (b1 << 8) | b2);
      }
    else
-    XSETFASTINT (val, 0);
+    error ("Can't encode to Big5: %d", XFASTINT (ch));
    return val;
  }
  \f
@@ -5196,18 +5324,18 @@ which is a list of all the arguments given to this function.")
    if (NILP (chain))
      return Qnil;
  
-  for (; CONSP (chain); chain = XCONS (chain)->cdr)
+  for (; CONSP (chain); chain = XCDR (chain))
      {
        Lisp_Object elt;
-      elt = XCONS (chain)->car;
+      elt = XCAR (chain);
  
        if (CONSP (elt)
           && ((STRINGP (target)
-              && STRINGP (XCONS (elt)->car)
-              && fast_string_match (XCONS (elt)->car, target) >= 0)
-             || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
+              && STRINGP (XCAR (elt))
+              && fast_string_match (XCAR (elt), target) >= 0)
+             || (INTEGERP (target) && EQ (target, XCAR (elt)))))
         {
-         val = XCONS (elt)->cdr;
+         val = XCDR (elt);
           /* Here, if VAL is both a valid coding system and a valid
               function symbol, we return VAL as a coding system.  */
           if (CONSP (val))
@@ -5278,13 +5406,13 @@ This function is internal use only.")
  
    while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
      {
-      if (! SYMBOLP (XCONS (val)->car))
+      if (! SYMBOLP (XCAR (val)))
         break;
-      idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
+      idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
        if (idx >= CODING_CATEGORY_IDX_MAX)
         break;
        coding_priorities[i++] = (1 << idx);
-      val = XCONS (val)->cdr;
+      val = XCDR (val);
      }
    /* If coding-category-list is valid and contains all coding
       categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
@@ -5544,11 +5672,16 @@ There are three such tables, `file-coding-system-alist',\n\
  
    DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
      "Specify the coding system for write operations.\n\
-It is useful to bind this variable with `let', but do not set it globally.\n\
-If the value is a coding system, it is used for encoding on write operation.\n\
-If not, an appropriate element is used from one of the coding system alists:\n\
+Programs bind this variable with `let', but you should not set it globally.\n\
+If the value is a coding system, it is used for encoding of output,\n\
+when writing it to a file and when sending it to a file or subprocess.\n\
+\n\
+If this does not specify a coding system, an appropriate element\n\
+is used from one of the coding system alists:\n\
  There are three such tables, `file-coding-system-alist',\n\
-`process-coding-system-alist', and `network-coding-system-alist'.");
+`process-coding-system-alist', and `network-coding-system-alist'.\n\
+For output to files, if the above procedure does not specify a coding system,\n\
+the value of `buffer-file-coding-system' is used.");
    Vcoding_system_for_write = Qnil;
  
    DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
@@ -5556,7 +5689,9 @@ There are three such tables, `file-coding-system-alist',\n\
    Vlast_coding_system_used = Qnil;
  
    DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
-    "*Non-nil inhibit code conversion of end-of-line format in any cases.");
+    "*Non-nil means always inhibit code conversion of end-of-line format.\n\
+See info node `Coding Systems' and info node `Text and Binary' concerning\n\
+such conversion.");
    inhibit_eol_conversion = 0;
  
    DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
@@ -5612,21 +5747,21 @@ or a cons of coding systems which are used as above.\n\
  See also the function `find-operation-coding-system'.");
    Vnetwork_coding_system_alist = Qnil;
  
-  DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
-    "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
-  eol_mnemonic_unix = ':';
+  DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
+    "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
+  eol_mnemonic_unix = build_string (":");
  
-  DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
-    "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
-  eol_mnemonic_dos = '\\';
+  DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
+    "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
+  eol_mnemonic_dos = build_string ("\\");
  
-  DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
-    "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
-  eol_mnemonic_mac = '/';
+  DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
+    "*String displayed in mode line for MAC-like (CR) end-of-line format.");
+  eol_mnemonic_mac = build_string ("/");
  
-  DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
-    "Mnemonic character indicating end-of-line format is not yet decided.");
-  eol_mnemonic_undecided = ':';
+  DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
+    "*String displayed in mode line when end-of-line format is not yet determined.");
+  eol_mnemonic_undecided = build_string (":");
  
    DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
      "*Non-nil enables character translation while encoding and decoding.");