Avoid (most) uses of XCAR/XCDR as lvalues, for flexibility in experimenting
[bpt/emacs.git] / src / ccl.c
index f5f024b..52a4ff3 100644 (file)
--- a/src/ccl.c
+++ b/src/ccl.c
@@ -1,5 +1,6 @@
 /* CCL (Code Conversion Language) interpreter.
    Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
+   Copyright (C) 2001 Free Software Foundation, Inc.
    Licensed to the Free Software Foundation.
 
 This file is part of GNU Emacs.
@@ -721,8 +722,29 @@ else
                 multibyte form later.  */                              \
              extra_bytes++;                                            \
          }                                                             \
-       else                                                            \
+       else if (CHAR_VALID_P (ch, 0))                                  \
          dst += CHAR_STRING (ch, dst);                                 \
+       else                                                            \
+         CCL_INVALID_CMD;                                              \
+      }                                                                        \
+    else                                                               \
+      CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST);                           \
+  } while (0)
+
+/* Encode one character CH to multibyte form and write to the current
+   output buffer.  The output bytes always forms a valid multibyte
+   sequence.  */
+#define CCL_WRITE_MULTIBYTE_CHAR(ch)                                   \
+  do {                                                                 \
+    int bytes = CHAR_BYTES (ch);                                       \
+    if (!dst)                                                          \
+      CCL_INVALID_CMD;                                                 \
+    else if (dst + bytes + extra_bytes < (dst_bytes ? dst_end : src))  \
+      {                                                                        \
+       if (CHAR_VALID_P ((ch), 0))                                     \
+         dst += CHAR_STRING ((ch), dst);                               \
+       else                                                            \
+         CCL_INVALID_CMD;                                              \
       }                                                                        \
     else                                                               \
       CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST);                           \
@@ -839,15 +861,15 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
 {
   register int *reg = ccl->reg;
   register int ic = ccl->ic;
-  register int code, field1, field2;
+  register int code = 0, field1, field2;
   register Lisp_Object *ccl_prog = ccl->prog;
   unsigned char *src = source, *src_end = src + src_bytes;
   unsigned char *dst = destination, *dst_end = dst + dst_bytes;
   int jump_address;
-  int i, j, op;
+  int i = 0, j, op;
   int stack_idx = ccl->stack_idx;
   /* Instruction counter of the current CCL code. */
-  int this_ic;
+  int this_ic = 0;
   /* CCL_WRITE_CHAR will produce 8-bit code of range 0x80..0x9F.  But,
      each of them will be converted to multibyte form of 2-byte
      sequence.  For that conversion, we remember how many more bytes
@@ -857,7 +879,7 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
   if (ic >= ccl->eof_ic)
     ic = CCL_HEADER_MAIN;
 
-  if (ccl->buf_magnification ==0) /* We can't produce any bytes.  */
+  if (ccl->buf_magnification == 0) /* We can't produce any bytes.  */
     dst = NULL;
 
   /* Set mapping stack pointer. */
@@ -1227,6 +1249,16 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
                  goto ccl_read_multibyte_character_suspend;
                }
              
+             if (!ccl->multibyte)
+               {
+                 int bytes;
+                 if (!UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+                   {
+                     reg[RRR] = CHARSET_8_BIT_CONTROL;
+                     reg[rrr] = *src++;
+                     break;
+                   }
+               }
              i = *src++;
              if (i == '\n' && ccl->eol_type != CODING_EOL_LF)
                {
@@ -1253,21 +1285,27 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
                  reg[rrr] = i;
                  reg[RRR] = CHARSET_ASCII;
                }
-             else if (i <= MAX_CHARSET_OFFICIAL_DIMENSION1)
-               {
-                 if (src >= src_end)
-                   goto ccl_read_multibyte_character_suspend;
-                 reg[RRR] = i;
-                 reg[rrr] = (*src++ & 0x7F);
-               }
              else if (i <= MAX_CHARSET_OFFICIAL_DIMENSION2)
                {
-                 if ((src + 1) >= src_end)
+                 int dimension = BYTES_BY_CHAR_HEAD (i) - 1;
+
+                 if (dimension == 0)
+                   {
+                     /* `i' is a leading code for an undefined charset.  */
+                     reg[RRR] = CHARSET_8_BIT_GRAPHIC;
+                     reg[rrr] = i;
+                   }
+                 else if (src + dimension > src_end)
                    goto ccl_read_multibyte_character_suspend;
-                 reg[RRR] = i;
-                 i = (*src++ & 0x7F);
-                 reg[rrr] = ((i << 7) | (*src & 0x7F));
-                 src++;
+                 else
+                   {
+                     reg[RRR] = i;
+                     i = (*src++ & 0x7F);
+                     if (dimension == 1)
+                       reg[rrr] = i;
+                     else
+                       reg[rrr] = ((i << 7) | (*src++ & 0x7F));
+                   }
                }
              else if ((i == LEADING_CODE_PRIVATE_11)
                       || (i == LEADING_CODE_PRIVATE_12))
@@ -1308,6 +1346,12 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
              break;
 
            ccl_read_multibyte_character_suspend:
+             if (src <= src_end && !ccl->multibyte && ccl->last_block)
+               {
+                 reg[RRR] = CHARSET_8_BIT_CONTROL;
+                 reg[rrr] = i;
+                 break;
+               }
              src--;
              if (ccl->last_block)
                {
@@ -1332,7 +1376,7 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
              else
                i = ((i - 0xE0) << 14) | reg[rrr];
 
-             CCL_WRITE_CHAR (i);
+             CCL_WRITE_MULTIBYTE_CHAR (i);
 
              break;
 
@@ -1781,13 +1825,33 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
          bcopy (msg, dst, msglen);
          dst += msglen;
        }
+      
+      if (ccl->status == CCL_STAT_INVALID_CMD)
+       {
+#if 0 /* If the remaining bytes contain 0x80..0x9F, copying them
+        results in an invalid multibyte sequence.  */
+
+         /* Copy the remaining source data.  */
+         int i = src_end - src;
+         if (dst_bytes && (dst_end - dst) < i)
+           i = dst_end - dst;
+         bcopy (src, dst, i);
+         src += i;
+         dst += i;
+#else
+         /* Signal that we've consumed everything.  */
+         src = src_end;
+#endif
+       }
     }
 
  ccl_finish:
   ccl->ic = ic;
   ccl->stack_idx = stack_idx;
   ccl->prog = ccl_prog;
-  if (consumed) *consumed = src - source;
+  ccl->eight_bit_control = (extra_bytes > 0);
+  if (consumed)
+    *consumed = src - source;
   return (dst ? dst - destination : 0);
 }
 
@@ -1971,17 +2035,18 @@ DEFUN ("ccl-execute", Fccl_execute, Sccl_execute, 2, 2, 0,
   "Execute CCL-PROGRAM with registers initialized by REGISTERS.\n\
 \n\
 CCL-PROGRAM is a CCL program name (symbol)\n\
-or a compiled code generated by `ccl-compile' (for backward compatibility,\n\
-in this case, the overhead of the execution is bigger than the former case).\n\
+or compiled code generated by `ccl-compile' (for backward compatibility.\n\
+In the latter case, the execution overhead is bigger than in the former).\n\
 No I/O commands should appear in CCL-PROGRAM.\n\
 \n\
 REGISTERS is a vector of [R0 R1 ... R7] where RN is an initial value\n\
- of Nth register.\n\
+for the Nth register.\n\
 \n\
 As side effect, each element of REGISTERS holds the value of\n\
- corresponding register after the execution.\n\
+the corresponding register after the execution.\n\
 \n\
-See the documentation of `define-ccl-program' for the detail of CCL program.")
+See the documentation of `define-ccl-program' for a definition of CCL\n\
+programs.")
   (ccl_prog, reg)
      Lisp_Object ccl_prog, reg;
 {
@@ -2177,11 +2242,11 @@ Return index number of the registered CCL program.")
 
 /* Register code conversion map.
    A code conversion map consists of numbers, Qt, Qnil, and Qlambda.
-   The first element is start code point.
-   The rest elements are mapped numbers.
+   The first element is the start code point.
+   The other elements are mapped numbers.
    Symbol t means to map to an original number before mapping.
    Symbol nil means that the corresponding element is empty.
-   Symbol lambda menas to terminate mapping here.
+   Symbol lambda means to terminate mapping here.
 */
 
 DEFUN ("register-code-conversion-map", Fregister_code_conversion_map,
@@ -2209,7 +2274,7 @@ Return index number of the registered map.")
       if (EQ (symbol, XCAR (slot)))
        {
          index = make_number (i);
-         XCDR (slot) = map;
+         XSETCDR (slot, map);
          Fput (symbol, Qcode_conversion_map, map);
          Fput (symbol, Qcode_conversion_map_id, index);
          return index;