Avoid (most) uses of XCAR/XCDR as lvalues, for flexibility in experimenting
[bpt/emacs.git] / src / ccl.c
index 2be97ba..52a4ff3 100644 (file)
--- a/src/ccl.c
+++ b/src/ccl.c
@@ -1,5 +1,6 @@
 /* CCL (Code Conversion Language) interpreter.
    Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
+   Copyright (C) 2001 Free Software Foundation, Inc.
    Licensed to the Free Software Foundation.
 
 This file is part of GNU Emacs.
@@ -420,7 +421,7 @@ Lisp_Object Vccl_program_table;
                                          IC += 2;
                                        */
 
-#define CCL_Extention          0x1F /* Extended CCL code
+#define CCL_Extension          0x1F /* Extended CCL code
                                        1:ExtendedCOMMNDRrrRRRrrrXXXXX
                                        2:ARGUEMENT
                                        3:...
@@ -614,7 +615,8 @@ static int stack_idx_of_map_multiple;
   } while (0)
 
 #define CCL_CALL_FOR_MAP_INSTRUCTION(symbol, ret_ic)           \
-  do {                                                         \
+if (1)                                                         \
+  {                                                            \
     struct ccl_program called_ccl;                             \
     if (stack_idx >= 256                                       \
        || (setup_ccl_program (&called_ccl, (symbol)) != 0))    \
@@ -632,7 +634,8 @@ static int stack_idx_of_map_multiple;
     ccl_prog = called_ccl.prog;                                        \
     ic = CCL_HEADER_MAIN;                                      \
     goto ccl_repeat;                                           \
-  } while (0)
+  }                                                            \
+else
 
 #define CCL_MapSingle          0x12 /* Map by single code conversion map
                                        1:ExtendedCOMMNDXXXRRRrrrXXXXX
@@ -672,29 +675,35 @@ static int stack_idx_of_map_multiple;
                                   r[7] = LOWER_BYTE (SJIS (Y, Z) */
 
 /* Terminate CCL program successfully.  */
-#define CCL_SUCCESS                    \
-  do {                                 \
+#define CCL_SUCCESS                    \
+if (1)                                 \
+  {                                    \
     ccl->status = CCL_STAT_SUCCESS;    \
-    goto ccl_finish;                   \
-  } while (0)
+    goto ccl_finish;                   \
+  }                                    \
+else
 
 /* Suspend CCL program because of reading from empty input buffer or
    writing to full output buffer.  When this program is resumed, the
    same I/O command is executed.  */
 #define CCL_SUSPEND(stat)      \
-  do {                         \
+if (1)                         \
+  {                            \
     ic--;                      \
     ccl->status = stat;                \
     goto ccl_finish;           \
-  } while (0)
+  }                            \
+else
 
 /* Terminate CCL program because of invalid command.  Should not occur
    in the normal case.  */
 #define CCL_INVALID_CMD                        \
-  do {                                 \
+if (1)                                 \
+  {                                    \
     ccl->status = CCL_STAT_INVALID_CMD;        \
     goto ccl_error_handler;            \
-  } while (0)
+  }                                    \
+else
 
 /* Encode one character CH to multibyte form and write to the current
    output buffer.  If CH is less than 256, CH is written as is.  */
@@ -713,8 +722,29 @@ static int stack_idx_of_map_multiple;
                 multibyte form later.  */                              \
              extra_bytes++;                                            \
          }                                                             \
-       else                                                            \
+       else if (CHAR_VALID_P (ch, 0))                                  \
          dst += CHAR_STRING (ch, dst);                                 \
+       else                                                            \
+         CCL_INVALID_CMD;                                              \
+      }                                                                        \
+    else                                                               \
+      CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST);                           \
+  } while (0)
+
+/* Encode one character CH to multibyte form and write to the current
+   output buffer.  The output bytes always forms a valid multibyte
+   sequence.  */
+#define CCL_WRITE_MULTIBYTE_CHAR(ch)                                   \
+  do {                                                                 \
+    int bytes = CHAR_BYTES (ch);                                       \
+    if (!dst)                                                          \
+      CCL_INVALID_CMD;                                                 \
+    else if (dst + bytes + extra_bytes < (dst_bytes ? dst_end : src))  \
+      {                                                                        \
+       if (CHAR_VALID_P ((ch), 0))                                     \
+         dst += CHAR_STRING ((ch), dst);                               \
+       else                                                            \
+         CCL_INVALID_CMD;                                              \
       }                                                                        \
     else                                                               \
       CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST);                           \
@@ -734,15 +764,15 @@ static int stack_idx_of_map_multiple;
       CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST);           \
   } while (0)
 
-/* Read one byte from the current input buffer into Rth register.  */
-#define CCL_READ_CHAR(r)                               \
+/* Read one byte from the current input buffer into REGth register.  */
+#define CCL_READ_CHAR(REG)                             \
   do {                                                 \
     if (!src)                                          \
       CCL_INVALID_CMD;                                 \
     else if (src < src_end)                            \
       {                                                        \
-       r = *src++;                                     \
-       if (r == '\n'                                   \
+       REG = *src++;                                   \
+       if (REG == '\n'                                 \
            && ccl->eol_type != CODING_EOL_LF)          \
          {                                             \
            /* We are encoding.  */                     \
@@ -753,16 +783,16 @@ static int stack_idx_of_map_multiple;
                else                                    \
                  {                                     \
                    ccl->cr_consumed = 1;               \
-                   r = '\r';                           \
+                   REG = '\r';                         \
                    src--;                              \
                  }                                     \
              }                                         \
            else                                        \
-             r = '\r';                                 \
+             REG = '\r';                               \
          }                                             \
-       if (r == LEADING_CODE_8_BIT_CONTROL             \
+       if (REG == LEADING_CODE_8_BIT_CONTROL           \
            && ccl->multibyte)                          \
-         r = *src++ - 0x20;                            \
+         REG = *src++ - 0x20;                          \
       }                                                        \
     else if (ccl->last_block)                          \
       {                                                        \
@@ -831,15 +861,15 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
 {
   register int *reg = ccl->reg;
   register int ic = ccl->ic;
-  register int code, field1, field2;
+  register int code = 0, field1, field2;
   register Lisp_Object *ccl_prog = ccl->prog;
   unsigned char *src = source, *src_end = src + src_bytes;
   unsigned char *dst = destination, *dst_end = dst + dst_bytes;
   int jump_address;
-  int i, j, op;
+  int i = 0, j, op;
   int stack_idx = ccl->stack_idx;
   /* Instruction counter of the current CCL code. */
-  int this_ic;
+  int this_ic = 0;
   /* CCL_WRITE_CHAR will produce 8-bit code of range 0x80..0x9F.  But,
      each of them will be converted to multibyte form of 2-byte
      sequence.  For that conversion, we remember how many more bytes
@@ -849,7 +879,7 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
   if (ic >= ccl->eof_ic)
     ic = CCL_HEADER_MAIN;
 
-  if (ccl->buf_magnification ==0) /* We can't produce any bytes.  */
+  if (ccl->buf_magnification == 0) /* We can't produce any bytes.  */
     dst = NULL;
 
   /* Set mapping stack pointer. */
@@ -1206,103 +1236,122 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
            ic = jump_address;
          break;
 
-       case CCL_Extention:
+       case CCL_Extension:
          switch (EXCMD)
            {
            case CCL_ReadMultibyteChar2:
              if (!src)
                CCL_INVALID_CMD;
 
-             do {
-               if (src >= src_end)
-                 {
-                   src++;
-                   goto ccl_read_multibyte_character_suspend;
-                 }
+             if (src >= src_end)
+               {
+                 src++;
+                 goto ccl_read_multibyte_character_suspend;
+               }
              
-               i = *src++;
-               if (i == '\n' && ccl->eol_type != CODING_EOL_LF)
-                 {
-                   /* We are encoding.  */ 
-                   if (ccl->eol_type == CODING_EOL_CRLF)
-                     {
-                       if (ccl->cr_consumed)
-                         ccl->cr_consumed = 0;
-                       else
-                         {
-                           ccl->cr_consumed = 1;
-                           i = '\r';
-                           src--;
-                         }
-                     }
-                   else
-                     i = '\r';
-                   reg[rrr] = i;
-                   reg[RRR] = CHARSET_ASCII;
-                 }
-               else if (i < 0x80)
-                 {
-                   /* ASCII */
-                   reg[rrr] = i;
-                   reg[RRR] = CHARSET_ASCII;
-                 }
-               else if (i <= MAX_CHARSET_OFFICIAL_DIMENSION1)
-                 {
-                   if (src >= src_end)
-                     goto ccl_read_multibyte_character_suspend;
-                   reg[RRR] = i;
-                   reg[rrr] = (*src++ & 0x7F);
-                 }
-               else if (i <= MAX_CHARSET_OFFICIAL_DIMENSION2)
-                 {
-                   if ((src + 1) >= src_end)
-                     goto ccl_read_multibyte_character_suspend;
-                   reg[RRR] = i;
-                   i = (*src++ & 0x7F);
-                   reg[rrr] = ((i << 7) | (*src & 0x7F));
-                   src++;
-                 }
-               else if ((i == LEADING_CODE_PRIVATE_11)
-                        || (i == LEADING_CODE_PRIVATE_12))
-                 {
-                   if ((src + 1) >= src_end)
-                     goto ccl_read_multibyte_character_suspend;
-                   reg[RRR] = *src++;
-                   reg[rrr] = (*src++ & 0x7F);
-                 }
-               else if ((i == LEADING_CODE_PRIVATE_21)
-                        || (i == LEADING_CODE_PRIVATE_22))
-                 {
-                   if ((src + 2) >= src_end)
-                     goto ccl_read_multibyte_character_suspend;
-                   reg[RRR] = *src++;
-                   i = (*src++ & 0x7F);
-                   reg[rrr] = ((i << 7) | (*src & 0x7F));
-                   src++;
-                 }
-               else if (i == LEADING_CODE_8_BIT_CONTROL)
-                 {
-                   if (src >= src_end)
-                     goto ccl_read_multibyte_character_suspend;
-                   reg[RRR] = CHARSET_8_BIT_CONTROL;
-                   reg[rrr] = (*src++ - 0x20);
-                 }
-               else if (i >= 0xA0)
-                 {
-                   reg[RRR] = CHARSET_8_BIT_GRAPHIC;
-                   reg[rrr] = i;
-                 }
-               else
-                 {
-                   /* INVALID CODE.  Return a single byte character.  */
-                   reg[RRR] = CHARSET_ASCII;
-                   reg[rrr] = i;
-                 }
-               break;
-             } while (1);
+             if (!ccl->multibyte)
+               {
+                 int bytes;
+                 if (!UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+                   {
+                     reg[RRR] = CHARSET_8_BIT_CONTROL;
+                     reg[rrr] = *src++;
+                     break;
+                   }
+               }
+             i = *src++;
+             if (i == '\n' && ccl->eol_type != CODING_EOL_LF)
+               {
+                 /* We are encoding.  */ 
+                 if (ccl->eol_type == CODING_EOL_CRLF)
+                   {
+                     if (ccl->cr_consumed)
+                       ccl->cr_consumed = 0;
+                     else
+                       {
+                         ccl->cr_consumed = 1;
+                         i = '\r';
+                         src--;
+                       }
+                   }
+                 else
+                   i = '\r';
+                 reg[rrr] = i;
+                 reg[RRR] = CHARSET_ASCII;
+               }
+             else if (i < 0x80)
+               {
+                 /* ASCII */
+                 reg[rrr] = i;
+                 reg[RRR] = CHARSET_ASCII;
+               }
+             else if (i <= MAX_CHARSET_OFFICIAL_DIMENSION2)
+               {
+                 int dimension = BYTES_BY_CHAR_HEAD (i) - 1;
+
+                 if (dimension == 0)
+                   {
+                     /* `i' is a leading code for an undefined charset.  */
+                     reg[RRR] = CHARSET_8_BIT_GRAPHIC;
+                     reg[rrr] = i;
+                   }
+                 else if (src + dimension > src_end)
+                   goto ccl_read_multibyte_character_suspend;
+                 else
+                   {
+                     reg[RRR] = i;
+                     i = (*src++ & 0x7F);
+                     if (dimension == 1)
+                       reg[rrr] = i;
+                     else
+                       reg[rrr] = ((i << 7) | (*src++ & 0x7F));
+                   }
+               }
+             else if ((i == LEADING_CODE_PRIVATE_11)
+                      || (i == LEADING_CODE_PRIVATE_12))
+               {
+                 if ((src + 1) >= src_end)
+                   goto ccl_read_multibyte_character_suspend;
+                 reg[RRR] = *src++;
+                 reg[rrr] = (*src++ & 0x7F);
+               }
+             else if ((i == LEADING_CODE_PRIVATE_21)
+                      || (i == LEADING_CODE_PRIVATE_22))
+               {
+                 if ((src + 2) >= src_end)
+                   goto ccl_read_multibyte_character_suspend;
+                 reg[RRR] = *src++;
+                 i = (*src++ & 0x7F);
+                 reg[rrr] = ((i << 7) | (*src & 0x7F));
+                 src++;
+               }
+             else if (i == LEADING_CODE_8_BIT_CONTROL)
+               {
+                 if (src >= src_end)
+                   goto ccl_read_multibyte_character_suspend;
+                 reg[RRR] = CHARSET_8_BIT_CONTROL;
+                 reg[rrr] = (*src++ - 0x20);
+               }
+             else if (i >= 0xA0)
+               {
+                 reg[RRR] = CHARSET_8_BIT_GRAPHIC;
+                 reg[rrr] = i;
+               }
+             else
+               {
+                 /* INVALID CODE.  Return a single byte character.  */
+                 reg[RRR] = CHARSET_ASCII;
+                 reg[rrr] = i;
+               }
              break;
 
            ccl_read_multibyte_character_suspend:
+             if (src <= src_end && !ccl->multibyte && ccl->last_block)
+               {
+                 reg[RRR] = CHARSET_8_BIT_CONTROL;
+                 reg[rrr] = i;
+                 break;
+               }
              src--;
              if (ccl->last_block)
                {
@@ -1327,7 +1376,7 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
              else
                i = ((i - 0xE0) << 14) | reg[rrr];
 
-             CCL_WRITE_CHAR (i);
+             CCL_WRITE_MULTIBYTE_CHAR (i);
 
              break;
 
@@ -1715,7 +1764,9 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
     }
 
  ccl_error_handler:
-  if (destination)
+  /* The suppress_error member is set when e.g. a CCL-based coding
+     system is used for terminal output.  */
+  if (!ccl->suppress_error && destination)
     {
       /* We can insert an error message only if DESTINATION is
          specified and we still have a room to store the message
@@ -1774,13 +1825,33 @@ ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed)
          bcopy (msg, dst, msglen);
          dst += msglen;
        }
+      
+      if (ccl->status == CCL_STAT_INVALID_CMD)
+       {
+#if 0 /* If the remaining bytes contain 0x80..0x9F, copying them
+        results in an invalid multibyte sequence.  */
+
+         /* Copy the remaining source data.  */
+         int i = src_end - src;
+         if (dst_bytes && (dst_end - dst) < i)
+           i = dst_end - dst;
+         bcopy (src, dst, i);
+         src += i;
+         dst += i;
+#else
+         /* Signal that we've consumed everything.  */
+         src = src_end;
+#endif
+       }
     }
 
  ccl_finish:
   ccl->ic = ic;
   ccl->stack_idx = stack_idx;
   ccl->prog = ccl_prog;
-  if (consumed) *consumed = src - source;
+  ccl->eight_bit_control = (extra_bytes > 0);
+  if (consumed)
+    *consumed = src - source;
   return (dst ? dst - destination : 0);
 }
 
@@ -1932,6 +2003,7 @@ setup_ccl_program (ccl, ccl_prog)
   ccl->status = 0;
   ccl->stack_idx = 0;
   ccl->eol_type = CODING_EOL_LF;
+  ccl->suppress_error = 0;
   return 0;
 }
 
@@ -1963,17 +2035,18 @@ DEFUN ("ccl-execute", Fccl_execute, Sccl_execute, 2, 2, 0,
   "Execute CCL-PROGRAM with registers initialized by REGISTERS.\n\
 \n\
 CCL-PROGRAM is a CCL program name (symbol)\n\
-or a compiled code generated by `ccl-compile' (for backward compatibility,\n\
-in this case, the overhead of the execution is bigger than the former case).\n\
+or compiled code generated by `ccl-compile' (for backward compatibility.\n\
+In the latter case, the execution overhead is bigger than in the former).\n\
 No I/O commands should appear in CCL-PROGRAM.\n\
 \n\
 REGISTERS is a vector of [R0 R1 ... R7] where RN is an initial value\n\
- of Nth register.\n\
+for the Nth register.\n\
 \n\
 As side effect, each element of REGISTERS holds the value of\n\
- corresponding register after the execution.\n\
+the corresponding register after the execution.\n\
 \n\
-See the documentation of `define-ccl-program' for the detail of CCL program.")
+See the documentation of `define-ccl-program' for a definition of CCL\n\
+programs.")
   (ccl_prog, reg)
      Lisp_Object ccl_prog, reg;
 {
@@ -1992,7 +2065,7 @@ See the documentation of `define-ccl-program' for the detail of CCL program.")
                  ? XINT (XVECTOR (reg)->contents[i])
                  : 0);
 
-  ccl_driver (&ccl, (char *)0, (char *)0, 0, 0, (int *)0);
+  ccl_driver (&ccl, (unsigned char *)0, (unsigned char *)0, 0, 0, (int *)0);
   QUIT;
   if (ccl.status != CCL_STAT_SUCCESS)
     error ("Error in CCL program at %dth code", ccl.ic);
@@ -2111,11 +2184,15 @@ Return index number of the registered CCL program.")
     {
       CHECK_VECTOR (ccl_prog, 1);
       resolved = resolve_symbol_ccl_program (ccl_prog);
-      if (! NILP (resolved))
+      if (NILP (resolved))
+       error ("Error in CCL program");
+      if (VECTORP (resolved))
        {
          ccl_prog = resolved;
          resolved = Qt;
        }
+      else
+       resolved = Qnil;
     }
 
   for (idx = 0; idx < len; idx++)
@@ -2165,11 +2242,11 @@ Return index number of the registered CCL program.")
 
 /* Register code conversion map.
    A code conversion map consists of numbers, Qt, Qnil, and Qlambda.
-   The first element is start code point.
-   The rest elements are mapped numbers.
+   The first element is the start code point.
+   The other elements are mapped numbers.
    Symbol t means to map to an original number before mapping.
    Symbol nil means that the corresponding element is empty.
-   Symbol lambda menas to terminate mapping here.
+   Symbol lambda means to terminate mapping here.
 */
 
 DEFUN ("register-code-conversion-map", Fregister_code_conversion_map,
@@ -2197,7 +2274,7 @@ Return index number of the registered map.")
       if (EQ (symbol, XCAR (slot)))
        {
          index = make_number (i);
-         XCDR (slot) = map;
+         XSETCDR (slot, map);
          Fput (symbol, Qcode_conversion_map, map);
          Fput (symbol, Qcode_conversion_map_id, index);
          return index;