X-Git-Url: https://git.hcoop.net/bpt/emacs.git/blobdiff_plain/7d0393cf12f2e50ee50e1a1fd73a60e9ef1f56ac..ab5796a9f97180707734a81320e3eb81937281fe:/src/coding.c

diff --git a/src/coding.c b/src/coding.c
index 3fa383ad7b..6d36cc397c 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -1,7 +1,7 @@
 /* Coding system handler (conversion, detection, and etc).
-   Copyright (C) 1995, 1997, 1998, 2002 Electrotechnical Laboratory, JAPAN.
+   Copyright (C) 1995,97,1998,2002,2003  Electrotechnical Laboratory, JAPAN.
    Licensed to the Free Software Foundation.
-   Copyright (C) 2001 Free Software Foundation, Inc.
+   Copyright (C) 2001,2002,2003  Free Software Foundation, Inc.
 
 This file is part of GNU Emacs.
 
@@ -345,6 +345,7 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 #include "ccl.h"
 #include "coding.h"
 #include "window.h"
+#include "intervals.h"
 
 #else  /* not emacs */
 
@@ -367,6 +368,8 @@ Lisp_Object Qtarget_idx;
 
 Lisp_Object Vselect_safe_coding_system_function;
 
+int coding_system_require_warning;
+
 /* Mnemonic string for each format of end-of-line.  */
 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 /* Mnemonic string to indicate format of end-of-line is not yet
@@ -379,6 +382,16 @@ int system_eol_type;
 
 #ifdef emacs
 
+/* Information about which coding system is safe for which chars.
+   The value has the form (GENERIC-LIST . NON-GENERIC-ALIST).
+
+   GENERIC-LIST is a list of generic coding systems which can encode
+   any characters.
+
+   NON-GENERIC-ALIST is an alist of non generic coding systems vs the
+   corresponding char table that contains safe chars.  */
+Lisp_Object Vcoding_system_safe_chars;
+
 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 
 Lisp_Object Qcoding_system_p, Qcoding_system_error;
@@ -387,6 +400,8 @@ Lisp_Object Qcoding_system_p, Qcoding_system_error;
    end-of-line format.  */
 Lisp_Object Qemacs_mule, Qraw_text;
 
+Lisp_Object Qutf_8;
+
 /* Coding-systems are handed between Emacs Lisp programs and C internal
    routines by the following three variables.  */
 /* Coding-system for reading files and receiving data from process.  */
@@ -485,26 +500,27 @@ Lisp_Object Vcharset_revision_alist;
 /* Default coding systems used for process I/O.  */
 Lisp_Object Vdefault_process_coding_system;
 
+/* Char table for translating Quail and self-inserting input.  */
+Lisp_Object Vtranslation_table_for_input;
+
 /* Global flag to tell that we can't call post-read-conversion and
    pre-write-conversion functions.  Usually the value is zero, but it
    is set to 1 temporarily while such functions are running.  This is
    to avoid infinite recursive call.  */
 static int inhibit_pre_post_conversion;
 
-/* Char-table containing safe coding systems of each character.  */
-Lisp_Object Vchar_coding_system_table;
 Lisp_Object Qchar_coding_system;
 
-/* Return `safe-chars' property of coding system CODING.  Don't check
-   validity of CODING.  */
+/* Return `safe-chars' property of CODING_SYSTEM (symbol).  Don't check
+   its validity.  */
 
 Lisp_Object
-coding_safe_chars (coding)
-     struct coding_system *coding;
+coding_safe_chars (coding_system)
+     Lisp_Object coding_system;
 {
   Lisp_Object coding_spec, plist, safe_chars;
 
-  coding_spec = Fget (coding->symbol, Qcoding_system);
+  coding_spec = Fget (coding_system, Qcoding_system);
   plist = XVECTOR (coding_spec)->contents[3];
   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
@@ -673,8 +689,16 @@ detect_coding_emacs_mule (src, src_end, multibytep)
 
 /* Record one COMPONENT (alternate character or composition rule).  */
 
-#define CODING_ADD_COMPOSITION_COMPONENT(coding, component)	\
-  (coding->cmp_data->data[coding->cmp_data->used++] = component)
+#define CODING_ADD_COMPOSITION_COMPONENT(coding, component)		\
+  do {									\
+    coding->cmp_data->data[coding->cmp_data->used++] = component;	\
+    if (coding->cmp_data->used - coding->cmp_data_start			\
+	== COMPOSITION_DATA_MAX_BUNCH_LENGTH)				\
+      {									\
+	CODING_ADD_COMPOSITION_END (coding, coding->produced_char);	\
+	coding->composing = COMPOSITION_NO;				\
+      }									\
+  } while (0)
 
 
 /* Get one byte from a data pointed by SRC and increment SRC.  If SRC
@@ -691,7 +715,7 @@ detect_coding_emacs_mule (src, src_end, multibytep)
 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p)		\
   do {								\
     int bytes;							\
-    								\
+								\
     c = SAFE_ONE_MORE_BYTE ();					\
     if (c < 0)							\
       break;							\
@@ -722,7 +746,10 @@ detect_coding_emacs_mule (src, src_end, multibytep)
 	      break;						\
 	    *p++ = c;						\
 	  }							\
-	if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes))	\
+	if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)	\
+	    || (coding->flags /* We are recovering a file.  */	\
+		&& p0[0] == LEADING_CODE_8_BIT_CONTROL		\
+		&& ! CHAR_HEAD_P (p0[1])))			\
 	  c = STRING_CHAR (p0, bytes);				\
 	else							\
 	  c = -1;						\
@@ -826,7 +853,10 @@ decode_composition_emacs_mule (coding, src, src_end,
 	  else
 	    {
 	      int bytes;
-	      if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+	      if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
+		  || (coding->flags /* We are recovering a file.  */
+		      && src[0] == LEADING_CODE_8_BIT_CONTROL
+		      && ! CHAR_HEAD_P (src[1])))
 		c = STRING_CHAR (src, bytes);
 	      else
 		c = *src, bytes = 1;
@@ -942,11 +972,6 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 	      ONE_MORE_BYTE (c);
 	      if (c != '\n')
 		{
-		  if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
-		    {
-		      coding->result = CODING_FINISH_INCONSISTENT_EOL;
-		      goto label_end_of_loop;
-		    }
 		  src--;
 		  c = '\r';
 		}
@@ -985,7 +1010,10 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 	  p = tmp;
 	  src++;
 	}
-      else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+      else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
+	       || (coding->flags /* We are recovering a file.  */
+		   && src[0] == LEADING_CODE_8_BIT_CONTROL
+		   && ! CHAR_HEAD_P (src[1])))
 	{
 	  p = src;
 	  src += bytes;
@@ -1116,7 +1144,22 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 	    EMIT_ONE_BYTE ('\r');
 	}
       else if (SINGLE_BYTE_CHAR_P (c))
-	EMIT_ONE_BYTE (c);
+	{
+	  if (coding->flags && ! ASCII_BYTE_P (c))
+	    {
+	      /* As we are auto saving, retain the multibyte form for
+		 8-bit chars.  */
+	      unsigned char buf[MAX_MULTIBYTE_LENGTH];
+	      int bytes = CHAR_STRING (c, buf);
+
+	      if (bytes == 1)
+		EMIT_ONE_BYTE (buf[0]);
+	      else
+		EMIT_TWO_BYTES (buf[0], buf[1]);
+	    }
+	  else
+	    EMIT_ONE_BYTE (c);
+	}
       else
 	EMIT_BYTES (src_base, src);
       coding->consumed_char++;
@@ -1310,7 +1353,7 @@ enum iso_code_class_type iso_code_class[256];
 #define CHARSET_OK(idx, charset, c)					\
   (coding_system_table[idx]						\
    && (charset == CHARSET_ASCII						\
-       || (safe_chars = coding_safe_chars (coding_system_table[idx]),	\
+       || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \
 	   CODING_SAFE_CHAR_P (safe_chars, c)))				\
    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx],	\
 					      charset)			\
@@ -1319,6 +1362,9 @@ enum iso_code_class_type iso_code_class[256];
 #define SHIFT_OUT_OK(idx) \
   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 
+#define COMPOSITION_OK(idx)	\
+  (coding_system_table[idx]->composing != COMPOSITION_DISABLED)
+
 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
    Check if a text is encoded in ISO2022.  If it is, return an
    integer in which appropriate flag bits any of:
@@ -1396,7 +1442,30 @@ detect_coding_iso2022 (src, src_end, multibytep)
 	  else if (c >= '0' && c <= '4')
 	    {
 	      /* ESC <Fp> for start/end composition.  */
-	      mask_found |= CODING_CATEGORY_MASK_ISO;
+	      if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7))
+		mask_found |= CODING_CATEGORY_MASK_ISO_7;
+	      else
+		mask &= ~CODING_CATEGORY_MASK_ISO_7;
+	      if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT))
+		mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
+	      else
+		mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
+	      if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1))
+		mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
+	      else
+		mask &= ~CODING_CATEGORY_MASK_ISO_8_1;
+	      if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2))
+		mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
+	      else
+		mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
+	      if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE))
+		mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
+	      else
+		mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
+	      if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))
+		mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
+	      else
+		mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 	      break;
 	    }
 	  else
@@ -1739,7 +1808,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
   Lisp_Object translation_table;
   Lisp_Object safe_chars;
 
-  safe_chars = coding_safe_chars (coding);
+  safe_chars = coding_safe_chars (coding->symbol);
 
   if (NILP (Venable_character_translation))
     translation_table = Qnil;
@@ -1754,7 +1823,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
 
   while (1)
     {
-      int c1, c2;
+      int c1, c2 = 0;
 
       src_base = src;
       ONE_MORE_BYTE (c1);
@@ -1830,11 +1899,6 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
 	      ONE_MORE_BYTE (c1);
 	      if (c1 != ISO_CODE_LF)
 		{
-		  if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
-		    {
-		      coding->result = CODING_FINISH_INCONSISTENT_EOL;
-		      goto label_end_of_loop;
-		    }
 		  src--;
 		  c1 = '\r';
 		}
@@ -1999,6 +2063,78 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
 		}
 	      continue;
 
+	    case '%':
+	      if (COMPOSING_P (coding))
+		DECODE_COMPOSITION_END ('1');
+	      ONE_MORE_BYTE (c1);
+	      if (c1 == '/')
+		{
+		  /* CTEXT extended segment:
+		     ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
+		     We keep these bytes as is for the moment.
+		     They may be decoded by post-read-conversion.  */
+		  int dim, M, L;
+		  int size, required;
+		  int produced_chars;
+		  
+		  ONE_MORE_BYTE (dim);
+		  ONE_MORE_BYTE (M);
+		  ONE_MORE_BYTE (L);
+		  size = ((M - 128) * 128) + (L - 128);
+		  required = 8 + size * 2;
+		  if (dst + required > (dst_bytes ? dst_end : src))
+		    goto label_end_of_loop;
+		  *dst++ = ISO_CODE_ESC;
+		  *dst++ = '%';
+		  *dst++ = '/';
+		  *dst++ = dim;
+		  produced_chars = 4;
+		  dst += CHAR_STRING (M, dst), produced_chars++;
+		  dst += CHAR_STRING (L, dst), produced_chars++;
+		  while (size-- > 0)
+		    {
+		      ONE_MORE_BYTE (c1);
+		      dst += CHAR_STRING (c1, dst), produced_chars++;
+		    }
+		  coding->produced_char += produced_chars;
+		}
+	      else if (c1 == 'G')
+		{
+		  unsigned char *d = dst;
+		  int produced_chars;
+
+		  /* XFree86 extension for embedding UTF-8 in CTEXT:
+		     ESC % G --UTF-8-BYTES-- ESC % @
+		     We keep these bytes as is for the moment.
+		     They may be decoded by post-read-conversion.  */
+		  if (d + 6 > (dst_bytes ? dst_end : src))
+		    goto label_end_of_loop;
+		  *d++ = ISO_CODE_ESC;
+		  *d++ = '%';
+		  *d++ = 'G';
+		  produced_chars = 3;
+		  while (d + 1 < (dst_bytes ? dst_end : src))
+		    {
+		      ONE_MORE_BYTE (c1);
+		      if (c1 == ISO_CODE_ESC
+			  && src + 1 < src_end
+			  && src[0] == '%'
+			  && src[1] == '@')
+			break;
+		      d += CHAR_STRING (c1, d), produced_chars++;
+		    }
+		  if (d + 3 > (dst_bytes ? dst_end : src))
+		    goto label_end_of_loop;
+		  *d++ = ISO_CODE_ESC;
+		  *d++ = '%';
+		  *d++ = '@';
+		  dst = d;
+		  coding->produced_char += produced_chars + 3;
+		}
+	      else
+		goto label_invalid_code;
+	      continue;
+
 	    default:
 	      if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
 		goto label_invalid_code;
@@ -2264,11 +2400,11 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
 
 /* Instead of encoding character C, produce one or two `?'s.  */
 
-#define ENCODE_UNSAFE_CHARACTER(c)					\
-  do {									\
-    ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);	\
-    if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)				\
-      ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);	\
+#define ENCODE_UNSAFE_CHARACTER(c)				\
+  do {								\
+    ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);	\
+    if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)			\
+      ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);	\
   } while (0)
 
 
@@ -2497,7 +2633,10 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
   Lisp_Object translation_table;
   Lisp_Object safe_chars;
 
-  safe_chars = coding_safe_chars (coding);
+  if (coding->flags & CODING_FLAG_ISO_SAFE)
+    coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
+
+  safe_chars = coding_safe_chars (coding->symbol);
 
   if (NILP (Venable_character_translation))
     translation_table = Qnil;
@@ -2564,7 +2703,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
 		    }
 		  else
 		    {
-		      if (coding->flags & CODING_FLAG_ISO_SAFE
+		      if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
 			  && ! CODING_SAFE_CHAR_P (safe_chars, c))
 			ENCODE_UNSAFE_CHARACTER (c);
 		      else
@@ -2633,7 +2772,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
 	  *dst++ = c;
 	  coding->errors++;
 	}
-      else if (coding->flags & CODING_FLAG_ISO_SAFE
+      else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
 	       && ! CODING_SAFE_CHAR_P (safe_chars, c))
 	ENCODE_UNSAFE_CHARACTER (c);
       else
@@ -2860,7 +2999,7 @@ detect_coding_utf_16 (src, src_end, multibytep)
      int multibytep;
 {
   unsigned char c1, c2;
-  /* Dummy for TWO_MORE_BYTES.  */
+  /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE.  */
   struct coding_system dummy_coding;
   struct coding_system *coding = &dummy_coding;
 
@@ -2911,7 +3050,7 @@ decode_coding_sjis_big5 (coding, source, destination,
   coding->produced_char = 0;
   while (1)
     {
-      int c, charset, c1, c2;
+      int c, charset, c1, c2 = 0;
 
       src_base = src;
       ONE_MORE_BYTE (c1);
@@ -2928,12 +3067,6 @@ decode_coding_sjis_big5 (coding, source, destination,
 		      ONE_MORE_BYTE (c2);
 		      if (c2 == '\n')
 			c1 = c2;
-		      else if (coding->mode
-			       & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
-			{
-			  coding->result = CODING_FINISH_INCONSISTENT_EOL;
-			  goto label_end_of_loop;
-			}
 		      else
 			/* To process C2 again, SRC is subtracted by 1.  */
 			src--;
@@ -3082,6 +3215,12 @@ encode_coding_sjis_big5 (coding, source, destination,
 		EMIT_ONE_BYTE (c1 | 0x80);
 	      else if (charset == charset_latin_jisx0201)
 		EMIT_ONE_BYTE (c1);
+	      else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
+		{
+		  EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
+		  if (CHARSET_WIDTH (charset) > 1)
+		    EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
+		}
 	      else
 		/* There's no way other than producing the internal
 		   codes as is.  */
@@ -3094,6 +3233,12 @@ encode_coding_sjis_big5 (coding, source, destination,
 		  ENCODE_BIG5 (charset, c1, c2, c1, c2);
 		  EMIT_TWO_BYTES (c1, c2);
 		}
+	      else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
+		{
+		  EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
+		  if (CHARSET_WIDTH (charset) > 1)
+		    EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
+		}
 	      else
 		/* There's no way other than producing the internal
 		   codes as is.  */
@@ -3179,11 +3324,6 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes)
 	      ONE_MORE_BYTE (c);
 	      if (c != '\n')
 		{
-		  if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
-		    {
-		      coding->result = CODING_FINISH_INCONSISTENT_EOL;
-		      goto label_end_of_loop;
-		    }
 		  src--;
 		  c = '\r';
 		}
@@ -4366,7 +4506,11 @@ encoding_buffer_size (coding, src_bytes)
   int magnification;
 
   if (coding->type == coding_type_ccl)
-    magnification = coding->spec.ccl.encoder.buf_magnification;
+    {
+      magnification = coding->spec.ccl.encoder.buf_magnification;
+      if (coding->eol_type == CODING_EOL_CRLF)
+	magnification *= 2;
+    }
   else if (CODING_REQUIRE_ENCODING (coding))
     magnification = 3;
   else
@@ -4451,7 +4595,10 @@ ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
       if (ccl->eol_type ==CODING_EOL_UNDECIDED)
 	ccl->eol_type = CODING_EOL_LF;
       ccl->cr_consumed = coding->spec.ccl.cr_carryover;
+      ccl->eight_bit_control = coding->dst_multibyte;
     }
+  else
+    ccl->eight_bit_control = 1;
   ccl->multibyte = coding->src_multibyte;
   if (coding->spec.ccl.eight_bit_carryover[0] != 0)
     {
@@ -5174,10 +5321,11 @@ static int shrink_conversion_region_threshhold = 1024;
   } while (0)
 
 static Lisp_Object
-code_convert_region_unwind (dummy)
-     Lisp_Object dummy;
+code_convert_region_unwind (arg)
+     Lisp_Object arg;
 {
   inhibit_pre_post_conversion = 0;
+  Vlast_coding_system_used = arg;
   return Qnil;
 }
 
@@ -5295,6 +5443,9 @@ coding_restore_composition (coding, obj)
 	      int len = data[0] - 4, j;
 	      Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
 
+	      if (method == COMPOSITION_WITH_RULE_ALTCHARS
+		  && len % 2 == 0)
+		len --;
 	      for (j = 0; j < len; j++)
 		args[j] = make_number (data[4 + j]);
 	      components = (method == COMPOSITION_WITH_ALTCHARS
@@ -5418,7 +5569,8 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
       struct buffer *prev = current_buffer;
       Lisp_Object new;
 
-      record_unwind_protect (code_convert_region_unwind, Qnil);
+      record_unwind_protect (code_convert_region_unwind,
+			     Vlast_coding_system_used);
       /* We should not call any more pre-write/post-read-conversion
          functions while this pre-write-conversion is running.  */
       inhibit_pre_post_conversion = 1;
@@ -5776,16 +5928,22 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
       && ! encodep && ! NILP (coding->post_read_conversion))
     {
       Lisp_Object val;
+      Lisp_Object saved_coding_system;
 
       if (from != PT)
 	TEMP_SET_PT_BOTH (from, from_byte);
       prev_Z = Z;
-      record_unwind_protect (code_convert_region_unwind, Qnil);
+      record_unwind_protect (code_convert_region_unwind,
+			     Vlast_coding_system_used);
+      saved_coding_system = Vlast_coding_system_used;
+      Vlast_coding_system_used = coding->symbol;
       /* We should not call any more pre-write/post-read-conversion
          functions while this post-read-conversion is running.  */
       inhibit_pre_post_conversion = 1;
       val = call1 (coding->post_read_conversion, make_number (inserted));
       inhibit_pre_post_conversion = 0;
+      coding->symbol = Vlast_coding_system_used;
+      Vlast_coding_system_used = saved_coding_system;
       /* Discard the unwind protect.  */
       specpdl_ptr--;
       CHECK_NUMBER (val);
@@ -5831,7 +5989,8 @@ run_pre_post_conversion_on_str (str, coding, encodep)
   Lisp_Object old_deactivate_mark;
 
   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
-  record_unwind_protect (code_convert_region_unwind, Qnil);
+  record_unwind_protect (code_convert_region_unwind,
+			 Vlast_coding_system_used);
   /* It is not crucial to specbind this.  */
   old_deactivate_mark = Vdeactivate_mark;
   GCPRO2 (str, old_deactivate_mark);
@@ -5839,12 +5998,13 @@ run_pre_post_conversion_on_str (str, coding, encodep)
   buffer = Fget_buffer_create (build_string (" *code-converting-work*"));
   buf = XBUFFER (buffer);
 
+  delete_all_overlays (buf);
   buf->directory = current_buffer->directory;
   buf->read_only = Qnil;
   buf->filename = Qnil;
   buf->undo_list = Qt;
-  buf->overlays_before = Qnil;
-  buf->overlays_after = Qnil;
+  eassert (buf->overlays_before == NULL);
+  eassert (buf->overlays_after == NULL);
 
   set_buffer_internal (buf);
   /* We must insert the contents of STR as is without
@@ -5861,8 +6021,10 @@ run_pre_post_conversion_on_str (str, coding, encodep)
     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
   else
     {
+      Vlast_coding_system_used = coding->symbol;
       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
       call1 (coding->post_read_conversion, make_number (Z - BEG));
+      coding->symbol = Vlast_coding_system_used;
     }
   inhibit_pre_post_conversion = 0;
   Vdeactivate_mark = old_deactivate_mark;
@@ -5946,7 +6108,9 @@ decode_coding_string (str, coding, nocopy)
       shrinked_bytes = from + (SBYTES (str) - to_byte);
     }
 
-  if (!require_decoding)
+  if (!require_decoding
+      && !(SYMBOLP (coding->post_read_conversion)
+	   && !NILP (Ffboundp (coding->post_read_conversion))))
     {
       coding->consumed = SBYTES (str);
       coding->consumed_char = SCHARS (str);
@@ -6283,8 +6447,11 @@ detect_coding_system (src, src_bytes, highest, multibytep)
 
 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
        2, 3, 0,
-       doc: /* Detect coding system of the text in the region between START and END.
-Return a list of possible coding systems ordered by priority.
+       doc: /* Detect how the byte sequence in the region is encoded.
+Return a list of possible coding systems used on decoding a byte
+sequence containing the bytes in the region between START and END when
+the coding system `undecided' is specified.  The list is ordered by
+priority decided in the current language environment.
 
 If only ASCII characters are found, it returns a list of single element
 `undecided' or its subsidiary coding system according to a detected
@@ -6327,8 +6494,11 @@ highest priority.  */)
 
 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
        1, 2, 0,
-       doc: /* Detect coding system of the text in STRING.
-Return a list of possible coding systems ordered by priority.
+       doc: /* Detect how the byte sequence in STRING is encoded.
+Return a list of possible coding systems used on decoding a byte
+sequence containing the bytes in STRING when the coding system
+`undecided' is specified.  The list is ordered by priority decided in
+the current language environment.
 
 If only ASCII characters are found, it returns a list of single element
 `undecided' or its subsidiary coding system according to a detected
@@ -6351,30 +6521,10 @@ highest priority.  */)
 			       STRING_MULTIBYTE (string));
 }
 
-/* Return an intersection of lists L1 and L2.  */
-
-static Lisp_Object
-intersection (l1, l2)
-     Lisp_Object l1, l2;
-{
-  Lisp_Object val = Fcons (Qnil, Qnil), tail;
-
-  for (tail = val; CONSP (l1); l1 = XCDR (l1))
-    {
-      if (!NILP (Fmemq (XCAR (l1), l2)))
-	{
-	  XSETCDR (tail, Fcons (XCAR (l1), Qnil));
-	  tail = XCDR (tail);
-	}
-    }
-  return XCDR (val);
-}
-
-
 /*  Subroutine for Fsafe_coding_systems_region_internal.
 
     Return a list of coding systems that safely encode the multibyte
-    text between P and PEND.  SAFE_CODINGS, if non-nil, is a list of
+    text between P and PEND.  SAFE_CODINGS, if non-nil, is an alist of
     possible coding systems.  If it is nil, it means that we have not
     yet found any coding systems.
 
@@ -6390,8 +6540,9 @@ find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
      Lisp_Object safe_codings, work_table;
      int *single_byte_char_found;
 {
-  int c, len, idx;
-  Lisp_Object val;
+  int c, len;
+  Lisp_Object val, ch;
+  Lisp_Object prev, tail;
 
   while (p < pend)
     {
@@ -6403,30 +6554,96 @@ find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
       if (SINGLE_BYTE_CHAR_P (c))
 	*single_byte_char_found = 1;
       if (NILP (safe_codings))
+	/* Already all coding systems are excluded.  But, we can't
+	   terminate the loop here because non-ASCII single-byte char
+	   must be found.  */
 	continue;
       /* Check the safe coding systems for C.  */
-      val = char_table_ref_and_index (work_table, c, &idx);
+      ch = make_number (c);
+      val = Faref (work_table, ch);
       if (EQ (val, Qt))
 	/* This element was already checked.  Ignore it.  */
 	continue;
       /* Remember that we checked this element.  */
-      CHAR_TABLE_SET (work_table, make_number (idx), Qt);
+      Faset (work_table, ch, Qt);
 
-      /* If there are some safe coding systems for C and we have
-	 already found the other set of coding systems for the
-	 different characters, get the intersection of them.  */
-      if (!EQ (safe_codings, Qt) && !NILP (val))
-	val = intersection (safe_codings, val);
-      safe_codings = val;
+      for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
+	{
+	  Lisp_Object elt, translation_table, hash_table, accept_latin_extra;
+	  int encodable;
+
+	  elt = XCAR (tail);
+	  if (CONSP (XCDR (elt)))
+	    {
+	      /* This entry has this format now:
+		 ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
+		          ACCEPT-LATIN-EXTRA ) */
+	      val = XCDR (elt);
+	      encodable = ! NILP (Faref (XCAR (val), ch));
+	      if (! encodable)
+		{
+		  val = XCDR (val);
+		  translation_table = XCAR (val);
+		  hash_table = XCAR (XCDR (val));
+		  accept_latin_extra = XCAR (XCDR (XCDR (val)));
+		}
+	    }
+	  else
+	    {
+	      /* This entry has this format now: ( CODING . SAFE-CHARS) */
+	      encodable = ! NILP (Faref (XCDR (elt), ch));
+	      if (! encodable)
+		{
+		  /* Transform the format to:
+		     ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
+		       ACCEPT-LATIN-EXTRA )  */
+		  val = Fget (XCAR (elt), Qcoding_system);
+		  translation_table
+		    = Fplist_get (AREF (val, 3),
+				  Qtranslation_table_for_encode);
+		  if (SYMBOLP (translation_table))
+		    translation_table = Fget (translation_table,
+					      Qtranslation_table);
+		  hash_table
+		    = (CHAR_TABLE_P (translation_table)
+		       ? XCHAR_TABLE (translation_table)->extras[1]
+		       : Qnil);
+		  accept_latin_extra
+		    = ((EQ (AREF (val, 0), make_number (2))
+			&& VECTORP (AREF (val, 4)))
+		       ? AREF (AREF (val, 4), 16)
+		       : Qnil);
+		  XSETCAR (tail, list5 (XCAR (elt), XCDR (elt),
+					translation_table, hash_table,
+					accept_latin_extra));
+		}
+	    }
+	      
+	  if (! encodable
+	      && ((CHAR_TABLE_P (translation_table)
+		   && ! NILP (Faref (translation_table, ch)))
+		  || (HASH_TABLE_P (hash_table)
+		      && ! NILP (Fgethash (ch, hash_table, Qnil)))
+		  || (SINGLE_BYTE_CHAR_P (c)
+		      && ! NILP (accept_latin_extra)
+		      && VECTORP (Vlatin_extra_code_table)
+		      && ! NILP (AREF (Vlatin_extra_code_table, c)))))
+	    encodable = 1;
+	  if (encodable)
+	    prev = tail;
+	  else
+	    {
+	      /* Exclude this coding system from SAFE_CODINGS.  */
+	      if (EQ (tail, safe_codings))
+		safe_codings = XCDR (safe_codings);
+	      else
+		XSETCDR (prev, XCDR (tail));
+	    }
+	}
     }
   return safe_codings;
 }
 
-
-/* Return a list of coding systems that safely encode the text between
-   START and END.  If the text contains only ASCII or is unibyte,
-   return t.  */
-
 DEFUN ("find-coding-systems-region-internal",
        Ffind_coding_systems_region_internal,
        Sfind_coding_systems_region_internal, 2, 2, 0,
@@ -6485,28 +6702,35 @@ DEFUN ("find-coding-systems-region-internal",
     }
 
   /* The text contains non-ASCII characters.  */
-  work_table = Fcopy_sequence (Vchar_coding_system_table);
-  safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
+
+  work_table = Fmake_char_table (Qchar_coding_system, Qnil);
+  safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
+
+  safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
 				    &single_byte_char_found);
   if (p2 < p2end)
     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
 				      &single_byte_char_found);
-
-  if (EQ (safe_codings, Qt))
-    ; /* Nothing to be done.  */
-  else if (!single_byte_char_found)
+  if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
+    safe_codings = Qt;
+  else
     {
-      /* Append generic coding systems.  */
-      Lisp_Object args[2];
-      args[0] = safe_codings;
-      args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
-					make_number (0));
-      safe_codings = Fappend (2, args);
+      /* Turn safe_codings to a list of coding systems... */
+      Lisp_Object val;
+
+      if (single_byte_char_found)
+	/* ... and append these for eight-bit chars.  */
+	val = Fcons (Qraw_text,
+		     Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil)));
+      else
+	/* ... and append generic coding systems.  */
+	val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars));
+
+      for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings))
+	val = Fcons (XCAR (XCAR (safe_codings)), val);
+      safe_codings = val;
     }
-  else
-    safe_codings = Fcons (Qraw_text,
-			  Fcons (Qemacs_mule,
-				 Fcons (Qno_conversion, safe_codings)));
+
   return safe_codings;
 }
 
@@ -6587,7 +6811,10 @@ to the string.  */)
       if (NILP (current_buffer->enable_multibyte_characters))
 	return Qnil;
       p = CHAR_POS_ADDR (from);
-      pend = CHAR_POS_ADDR (to);
+      if (to == GPT)
+	pend = GPT_ADDR;
+      else
+	pend = CHAR_POS_ADDR (to);
     }
   else
     {
@@ -6622,7 +6849,7 @@ to the string.  */)
   if (coding.type == coding_type_undecided)
     safe_chars = Qnil;
   else
-    safe_chars = coding_safe_chars (&coding);
+    safe_chars = coding_safe_chars (coding_system);
 
   if (STRINGP (string)
       || from >= GPT || to <= GPT)
@@ -6909,8 +7136,7 @@ Return the corresponding character code in Big5.  */)
   return val;
 }
 
-DEFUN ("set-terminal-coding-system-internal",
-       Fset_terminal_coding_system_internal,
+DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
        Sset_terminal_coding_system_internal, 1, 1, 0,
        doc: /* Internal use only.  */)
      (coding_system)
@@ -6919,7 +7145,7 @@ DEFUN ("set-terminal-coding-system-internal",
   CHECK_SYMBOL (coding_system);
   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
   /* We had better not send unsafe characters to terminal.  */
-  terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
+  terminal_coding.mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
   /* Character composition should be disabled.  */
   terminal_coding.composing = COMPOSITION_DISABLED;
   /* Error notification should be suppressed.  */
@@ -6929,8 +7155,7 @@ DEFUN ("set-terminal-coding-system-internal",
   return Qnil;
 }
 
-DEFUN ("set-safe-terminal-coding-system-internal",
-       Fset_safe_terminal_coding_system_internal,
+DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal,
        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
        doc: /* Internal use only.  */)
      (coding_system)
@@ -6948,16 +7173,15 @@ DEFUN ("set-safe-terminal-coding-system-internal",
   return Qnil;
 }
 
-DEFUN ("terminal-coding-system",
-       Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
+DEFUN ("terminal-coding-system", Fterminal_coding_system,
+       Sterminal_coding_system, 0, 0, 0,
        doc: /* Return coding system specified for terminal output.  */)
      ()
 {
   return terminal_coding.symbol;
 }
 
-DEFUN ("set-keyboard-coding-system-internal",
-       Fset_keyboard_coding_system_internal,
+DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
        Sset_keyboard_coding_system_internal, 1, 1, 0,
        doc: /* Internal use only.  */)
      (coding_system)
@@ -6970,8 +7194,8 @@ DEFUN ("set-keyboard-coding-system-internal",
   return Qnil;
 }
 
-DEFUN ("keyboard-coding-system",
-       Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
+DEFUN ("keyboard-coding-system", Fkeyboard_coding_system,
+       Skeyboard_coding_system, 0, 0, 0,
        doc: /* Return coding system specified for decoding keyboard input.  */)
      ()
 {
@@ -7143,6 +7367,40 @@ This function is internal use only.  */)
   return Qnil;
 }
 
+DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
+       Sdefine_coding_system_internal, 1, 1, 0,
+       doc: /* Register CODING-SYSTEM as a base coding system.
+This function is internal use only.  */)
+     (coding_system)
+     Lisp_Object coding_system;
+{
+  Lisp_Object safe_chars, slot;
+
+  if (NILP (Fcheck_coding_system (coding_system)))
+    Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
+  safe_chars = coding_safe_chars (coding_system);
+  if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
+    error ("No valid safe-chars property for %s",
+	   SDATA (SYMBOL_NAME (coding_system)));
+  if (EQ (safe_chars, Qt))
+    {
+      if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
+	XSETCAR (Vcoding_system_safe_chars,
+		 Fcons (coding_system, XCAR (Vcoding_system_safe_chars)));
+    }
+  else
+    {
+      slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars));
+      if (NILP (slot))
+	XSETCDR (Vcoding_system_safe_chars,
+		 nconc2 (XCDR (Vcoding_system_safe_chars),
+			 Fcons (Fcons (coding_system, safe_chars), Qnil)));
+      else
+	XSETCDR (slot, safe_chars);
+    }
+  return Qnil;
+}
+
 #endif /* emacs */
 
 
@@ -7296,9 +7554,12 @@ syms_of_coding ()
       }
   }
 
+  Vcoding_system_safe_chars = Fcons (Qnil, Qnil);
+  staticpro (&Vcoding_system_safe_chars);
+
   Qtranslation_table = intern ("translation-table");
   staticpro (&Qtranslation_table);
-  Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
+  Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
 
   Qtranslation_table_id = intern ("translation-table-id");
   staticpro (&Qtranslation_table_id);
@@ -7320,7 +7581,7 @@ syms_of_coding ()
      But don't staticpro it here--that is done in alloc.c.  */
   Qchar_table_extra_slots = intern ("char-table-extra-slots");
   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
-  Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (2));
+  Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
 
   Qvalid_codes = intern ("valid-codes");
   staticpro (&Qvalid_codes);
@@ -7331,6 +7592,9 @@ syms_of_coding ()
   Qraw_text = intern ("raw-text");
   staticpro (&Qraw_text);
 
+  Qutf_8 = intern ("utf-8");
+  staticpro (&Qutf_8);
+
   defsubr (&Scoding_system_p);
   defsubr (&Sread_coding_system);
   defsubr (&Sread_non_nil_coding_system);
@@ -7355,6 +7619,7 @@ syms_of_coding ()
   defsubr (&Sfind_operation_coding_system);
   defsubr (&Supdate_coding_systems_internal);
   defsubr (&Sset_coding_priority_internal);
+  defsubr (&Sdefine_coding_system_internal);
 
   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
 	       doc: /* List of coding systems.
@@ -7415,7 +7680,9 @@ the value of `buffer-file-coding-system' is used.  */);
   Vcoding_system_for_write = Qnil;
 
   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
-	       doc: /* Coding system used in the latest file or process I/O.  */);
+	       doc: /* Coding system used in the latest file or process I/O.
+Also set by `encode-coding-region', `decode-coding-region',
+`encode-coding-string' and `decode-coding-string'.  */);
   Vlast_coding_system_used = Qnil;
 
   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
@@ -7549,11 +7816,14 @@ coding system used in each operation can't encode the text.
 The default value is `select-safe-coding-system' (which see).  */);
   Vselect_safe_coding_system_function = Qnil;
 
-  DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
-	       doc: /* Char-table containing safe coding systems of each characters.
-Each element doesn't include such generic coding systems that can
-encode any characters.  They are in the first extra slot.  */);
-  Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
+  DEFVAR_BOOL ("coding-system-require-warning",
+	       &coding_system_require_warning,
+	       doc: /* Internal use only.
+If non-nil, on writing a file, `select-safe-coding-system-function' is
+called even if `coding-system-for-write' is non-nil.  The command
+`universal-coding-system-argument' binds this variable to t temporarily.  */);
+  coding_system_require_warning = 0;
+
 
   DEFVAR_BOOL ("inhibit-iso-escape-detection",
 	       &inhibit_iso_escape_detection,
@@ -7581,6 +7851,12 @@ The other way to read escape sequences in a file without decoding is
 to explicitly specify some coding system that doesn't use ISO2022's
 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
   inhibit_iso_escape_detection = 0;
+
+  DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
+	       doc: /* Char table for translating self-inserting characters.
+This is applied to the result of input methods, not their input.  See also
+`keyboard-translate-table'.  */);
+    Vtranslation_table_for_input = Qnil;
 }
 
 char *
@@ -7605,3 +7881,5 @@ emacs_strerror (error_number)
 
 #endif /* emacs */
 
+/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
+   (do not change this comment) */