(encode_coding_object): If a pre-write-conversion
[bpt/emacs.git] / src / coding.c
index d618f4d..15130b9 100644 (file)
@@ -1,7 +1,7 @@
 /* Coding system handler (conversion, detection, etc).
    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
      Licensed to the Free Software Foundation.
-   Copyright (C) 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
    Copyright (C) 2003
      National Institute of Advanced Industrial Science and Technology (AIST)
      Registration Number H13PRO009
@@ -20,8 +20,8 @@ GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
 along with GNU Emacs; see the file COPYING.  If not, write to
-the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA.  */
 
 /*** TABLE OF CONTENTS ***
 
@@ -157,8 +157,8 @@ detect_coding_XXX (coding, detect_info)
      struct coding_system *coding;
      struct coding_detection_info *detect_info;
 {
-  unsigned char *src = coding->source;
-  unsigned char *src_end = coding->source + coding->src_bytes;
+  const unsigned char *src = coding->source;
+  const unsigned char *src_end = coding->source + coding->src_bytes;
   int multibytep = coding->src_multibyte;
   int consumed_chars = 0;
   int found = 0;
@@ -205,12 +205,12 @@ static void
 decode_coding_XXXX (coding)
      struct coding_system *coding;
 {
-  unsigned char *src = coding->source + coding->consumed;
-  unsigned char *src_end = coding->source + coding->src_bytes;
+  const unsigned char *src = coding->source + coding->consumed;
+  const unsigned char *src_end = coding->source + coding->src_bytes;
   /* SRC_BASE remembers the start position in source in each loop.
      The loop will be exited when there's not enough source code, or
      when there's no room in CHARBUF for a decoded character.  */
-  unsigned char *src_base;
+  const unsigned char *src_base;
   /* A buffer to produce decoded characters.  */
   int *charbuf = coding->charbuf + coding->charbuf_used;
   int *charbuf_end = coding->charbuf + coding->charbuf_size;
@@ -314,9 +314,10 @@ Lisp_Object Qvalid_codes;
 Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
+Lisp_Object QCascii_compatible_p;
 
 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
-Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
+Lisp_Object Qcall_process, Qcall_process_region;
 Lisp_Object Qstart_process, Qopen_network_stream;
 Lisp_Object Qtarget_idx;
 
@@ -706,7 +707,8 @@ static struct coding_system coding_categories[coding_category_max];
          c = ((c & 1) << 6) | *src++;                  \
        else                                            \
          {                                             \
-           c = - string_char (--src, &src, NULL);      \
+           src--;                                      \
+           c = - string_char (src, &src, NULL);        \
            record_conversion_result                    \
              (coding, CODING_RESULT_INVALID_SRC);      \
          }                                             \
@@ -724,7 +726,8 @@ static struct coding_system coding_categories[coding_category_max];
          c = ((c & 1) << 6) | *src++;                  \
        else                                            \
          {                                             \
-           c = - string_char (--src, &src, NULL);      \
+           src--;                                      \
+           c = - string_char (src, &src, NULL);        \
            record_conversion_result                    \
              (coding, CODING_RESULT_INVALID_SRC);      \
          }                                             \
@@ -918,6 +921,8 @@ record_conversion_result (struct coding_system *coding,
     case CODING_RESULT_INSUFFICIENT_MEM:
       Vlast_code_conversion_error = Qinsufficient_memory;
       break;
+    default:
+      Vlast_code_conversion_error = intern ("Unknown error");
     }
 }
 
@@ -1418,26 +1423,6 @@ detect_coding_utf_16 (coding, detect_info)
     }
   else if (c1 >= 0 && c2 >= 0)
     {
-      unsigned char b1[256], b2[256];
-      int b1_variants = 1, b2_variants = 1;
-      int n;
-
-      bzero (b1, 256), bzero (b2, 256);
-      b1[c1]++, b2[c2]++;
-      for (n = 0; n < 256 && src < src_end; n++)
-       {
-         src_base = src;
-         ONE_MORE_BYTE (c1);
-         ONE_MORE_BYTE (c2);
-         if (c1 < 0 || c2 < 0)
-           break;
-         if (! b1[c1++]) b1_variants++;
-         if (! b2[c2++]) b2_variants++;
-       }
-      if (b1_variants < b2_variants)
-       detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG;
-      else
-       detect_info->found |= CATEGORY_MASK_UTF_16_LE_NOSIG;      
       detect_info->rejected
        |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
     }
@@ -3819,7 +3804,7 @@ encode_coding_iso_2022 (coding)
              /* Not yet implemented.  */
              break;
            case CODING_ANNOTATE_CHARSET_MASK:
-             preferred_charset_id = charbuf[3];
+             preferred_charset_id = charbuf[2];
              if (preferred_charset_id >= 0
                  && NILP (Fmemq (make_number (preferred_charset_id),
                                  charset_list)))
@@ -4934,7 +4919,7 @@ setup_coding_system (coding_system, coding)
   Lisp_Object val;
 
   if (NILP (coding_system))
-    coding_system = Qno_conversion;
+    coding_system = Qundecided;
 
   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
 
@@ -5105,6 +5090,30 @@ setup_coding_system (coding_system, coding)
   return;
 }
 
+/* Return a list of charsets supported by CODING.  */
+
+Lisp_Object
+coding_charset_list (coding)
+     struct coding_system *coding;
+{
+  Lisp_Object attrs, charset_list;
+
+  CODING_GET_INFO (coding, attrs, charset_list);
+  if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
+    {
+      int flags = XINT (AREF (attrs, coding_attr_iso_flags));
+
+      if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
+       charset_list = Viso_2022_charset_list;
+    }
+  else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
+    {
+      charset_list = Vemacs_mule_charset_list;
+    }
+  return charset_list;
+}
+
+
 /* Return raw-text or one of its subsidiaries that has the same
    eol_type as CODING-SYSTEM.  */
 
@@ -5421,53 +5430,78 @@ detect_coding (coding)
   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
     {
       int c, i;
+      struct coding_detection_info detect_info;
 
+      detect_info.checked = detect_info.found = detect_info.rejected = 0;
       for (i = 0, src = coding->source; src < src_end; i++, src++)
        {
          c = *src;
-         if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
-                                       || c == ISO_CODE_SI
-                                       || c == ISO_CODE_SO)))
+         if (c & 0x80)
            break;
+         if (c < 0x20
+             && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+             && ! inhibit_iso_escape_detection
+             && ! detect_info.checked)
+           {
+             coding->head_ascii = src - (coding->source + coding->consumed);
+             if (detect_coding_iso_2022 (coding, &detect_info))
+               {
+                 /* We have scanned the whole data.  */
+                 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
+                   /* We didn't find an 8-bit code.  */
+                   src = src_end;
+                 break;
+               }
+           }
        }
       coding->head_ascii = src - (coding->source + coding->consumed);
 
-      if (coding->head_ascii < coding->src_bytes)
+      if (coding->head_ascii < coding->src_bytes
+         || detect_info.found)
        {
-         struct coding_detection_info detect_info;
          enum coding_category category;
          struct coding_system *this;
 
-         detect_info.checked = detect_info.found = detect_info.rejected = 0;
-         for (i = 0; i < coding_category_raw_text; i++)
-           {
-             category = coding_priorities[i];
-             this = coding_categories + category;
-             if (this->id < 0)
-               {
-                 /* No coding system of this category is defined.  */
-                 detect_info.rejected |= (1 << category);
-               }
-             else if (category >= coding_category_raw_text)
-               continue;
-             else if (detect_info.checked & (1 << category))
-               {
-                 if (detect_info.found & (1 << category))
-                   break;
-               }
-             else if ((*(this->detector)) (coding, &detect_info)
-                      && detect_info.found & (1 << category))
-               {
-                 if (category == coding_category_utf_16_auto)
-                   {
-                     if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
-                       category = coding_category_utf_16_le;
-                     else
-                       category = coding_category_utf_16_be;
-                   }
+         if (coding->head_ascii == coding->src_bytes)
+           /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
+           for (i = 0; i < coding_category_raw_text; i++)
+             {
+               category = coding_priorities[i];
+               this = coding_categories + category;
+               if (detect_info.found & (1 << category))
                  break;
-               }
-           }
+             }
+         else
+           for (i = 0; i < coding_category_raw_text; i++)
+             {
+               category = coding_priorities[i];
+               this = coding_categories + category;
+               if (this->id < 0)
+                 {
+                   /* No coding system of this category is defined.  */
+                   detect_info.rejected |= (1 << category);
+                 }
+               else if (category >= coding_category_raw_text)
+                 continue;
+               else if (detect_info.checked & (1 << category))
+                 {
+                   if (detect_info.found & (1 << category))
+                     break;
+                 }
+               else if ((*(this->detector)) (coding, &detect_info)
+                        && detect_info.found & (1 << category))
+                 {
+                   if (category == coding_category_utf_16_auto)
+                     {
+                       if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+                         category = coding_category_utf_16_le;
+                       else
+                         category = coding_category_utf_16_be;
+                     }
+                   break;
+                 }
+             }
+         
          if (i < coding_category_raw_text)
            setup_coding_system (CODING_ID_NAME (this->id), coding);
          else if (detect_info.rejected == CATEGORY_MASK_ANY)
@@ -6783,8 +6817,8 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
 
       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
       GCPRO2 (coding->src_object, coding->dst_object);
-      val = call1 (CODING_ATTR_POST_READ (attrs),
-                  make_number (coding->produced_char));
+      val = safe_call1 (CODING_ATTR_POST_READ (attrs),
+                       make_number (coding->produced_char));
       UNGCPRO;
       CHECK_NATNUM (val);
       coding->produced_char += Z - prev_Z;
@@ -6853,6 +6887,7 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
   Lisp_Object attrs;
   Lisp_Object buffer;
   int saved_pt = -1, saved_pt_byte;
+  int kill_src_buffer = 0;
 
   buffer = Fcurrent_buffer ();
 
@@ -6882,8 +6917,16 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
          set_buffer_internal (XBUFFER (coding->src_object));
        }
 
-      call2 (CODING_ATTR_PRE_WRITE (attrs),
-            make_number (BEG), make_number (Z));
+      {
+       Lisp_Object args[3];
+
+       args[0] = CODING_ATTR_PRE_WRITE (attrs);
+       args[1] = make_number (BEG);
+       args[2] = make_number (Z);
+       safe_call (3, args);
+      }
+      if (XBUFFER (coding->src_object) != current_buffer)
+       kill_src_buffer = 1;
       coding->src_object = Fcurrent_buffer ();
       if (BEG != GPT)
        move_gap_both (BEG, BEG_BYTE);
@@ -6986,6 +7029,8 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
                          saved_pt_byte + (coding->produced - bytes));
     }
 
+  if (kill_src_buffer)
+    Fkill_buffer (coding->src_object);
   unbind_to (count, Qnil);
 }
 
@@ -7120,49 +7165,73 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
       for (i = 0; src < src_end; i++, src++)
        {
          c = *src;
-         if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
-                                       || c == ISO_CODE_SI
-                                       || c == ISO_CODE_SO)))
+         if (c & 0x80)
            break;
+         if (c < 0x20
+             && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+             && inhibit_iso_escape_detection)
+           {
+             coding.head_ascii = src - coding.source;
+             if (detect_coding_iso_2022 (&coding, &detect_info))
+               {
+                 /* We have scanned the whole data.  */
+                 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
+                   /* We didn't find an 8-bit code.  */
+                   src = src_end;
+                 break;
+               }
+           }
        }
       coding.head_ascii = src - coding.source;
 
-      if (src < src_end)
-       for (i = 0; i < coding_category_raw_text; i++)
-         {
-           category = coding_priorities[i];
-           this = coding_categories + category;
-
-           if (this->id < 0)
-             {
-               /* No coding system of this category is defined.  */
-               detect_info.rejected |= (1 << category);
-             }
-           else if (category >= coding_category_raw_text)
-             continue;
-           else if (detect_info.checked & (1 << category))
+      if (src < src_end
+         || detect_info.found)
+       {
+         if (src == src_end)
+           /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
+           for (i = 0; i < coding_category_raw_text; i++)
              {
-               if (highest
-                   && (detect_info.found & (1 << category)))
+               category = coding_priorities[i];
+               if (detect_info.found & (1 << category))
                  break;
              }
-           else
+         else
+           for (i = 0; i < coding_category_raw_text; i++)
              {
-               if ((*(this->detector)) (&coding, &detect_info)
-                   && highest
-                   && (detect_info.found & (1 << category)))
+               category = coding_priorities[i];
+               this = coding_categories + category;
+
+               if (this->id < 0)
                  {
-                   if (category == coding_category_utf_16_auto)
+                   /* No coding system of this category is defined.  */
+                   detect_info.rejected |= (1 << category);
+                 }
+               else if (category >= coding_category_raw_text)
+                 continue;
+               else if (detect_info.checked & (1 << category))
+                 {
+                   if (highest
+                       && (detect_info.found & (1 << category)))
+                     break;
+                 }
+               else
+                 {
+                   if ((*(this->detector)) (&coding, &detect_info)
+                       && highest
+                       && (detect_info.found & (1 << category)))
                      {
-                       if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
-                         category = coding_category_utf_16_le;
-                       else
-                         category = coding_category_utf_16_be;
+                       if (category == coding_category_utf_16_auto)
+                         {
+                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+                             category = coding_category_utf_16_le;
+                           else
+                             category = coding_category_utf_16_be;
+                         }
+                       break;
                      }
-                   break;
                  }
              }
-         }
+       }
 
       if (detect_info.rejected == CATEGORY_MASK_ANY)
        {
@@ -8126,7 +8195,11 @@ DEFUN ("terminal-coding-system",
        doc: /* Return coding system specified for terminal output.  */)
      ()
 {
-  return CODING_ID_NAME (terminal_coding.id);
+  Lisp_Object coding_system;
+
+  coding_system = CODING_ID_NAME (terminal_coding.id);
+  /* For backward compatibility, return nil if it is `undecided'. */
+  return (coding_system != Qundecided ? coding_system : Qnil);
 }
 
 DEFUN ("set-keyboard-coding-system-internal",
@@ -8808,6 +8881,10 @@ usage: (define-coding-system-internal ...)  */)
   CODING_ATTR_PLIST (attrs)
     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
                                CODING_ATTR_PLIST (attrs)));
+  CODING_ATTR_PLIST (attrs)
+    = Fcons (QCascii_compatible_p, 
+            Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
+                   CODING_ATTR_PLIST (attrs)));
 
   eol_type = args[coding_arg_eol_type];
   if (! NILP (eol_type)
@@ -8909,6 +8986,10 @@ DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
       CHECK_SYMBOL (val);
       CODING_ATTR_PRE_WRITE (attrs) = val;
     }
+  else if (EQ (prop, QCascii_compatible_p))
+    {
+      CODING_ATTR_ASCII_COMPAT (attrs) = val;
+    }
 
   CODING_ATTR_PLIST (attrs)
     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
@@ -9184,6 +9265,7 @@ syms_of_coding ()
   DEFSYM (QCencode_translation_table, ":encode-translation-table");
   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
+  DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
 
   Vcoding_category_table
     = Fmake_vector (make_number (coding_category_max), Qnil);
@@ -9292,7 +9374,9 @@ updated by the functions `make-coding-system' and
 On detecting a coding system, Emacs tries code detection algorithms
 associated with each coding-category one by one in this order.  When
 one algorithm agrees with a byte sequence of source text, the coding
-system bound to the corresponding coding-category is selected.  */);
+system bound to the corresponding coding-category is selected.
+
+Don't modify this variable directly, but use `set-coding-priority'.  */);
   {
     int i;
 
@@ -9565,10 +9649,23 @@ character.");
     plist[15] = args[coding_arg_eol_type] = Qunix;
     args[coding_arg_plist] = Flist (16, plist);
     Fdefine_coding_system_internal (coding_arg_max, args);
+
+    plist[1] = args[coding_arg_name] = Qundecided;
+    plist[3] = args[coding_arg_mnemonic] = make_number ('-');
+    plist[5] = args[coding_arg_coding_type] = Qundecided;
+    /* This is already set.
+       plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
+    plist[8] = intern (":charset-list");
+    plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
+    plist[11] = args[coding_arg_for_unibyte] = Qnil;
+    plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
+    plist[15] = args[coding_arg_eol_type] = Qnil;
+    args[coding_arg_plist] = Flist (16, plist);
+    Fdefine_coding_system_internal (coding_arg_max, args);
   }
 
   setup_coding_system (Qno_conversion, &keyboard_coding);
-  setup_coding_system (Qno_conversion, &terminal_coding);
+  setup_coding_system (Qundecided, &terminal_coding);
   setup_coding_system (Qno_conversion, &safe_terminal_coding);
 
   {