/* CCL (Code Conversion Language) interpreter.
Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
+ Copyright (C) 2001 Free Software Foundation, Inc.
Licensed to the Free Software Foundation.
This file is part of GNU Emacs.
IC += 2;
*/
-#define CCL_Extention 0x1F /* Extended CCL code
+#define CCL_Extension 0x1F /* Extended CCL code
1:ExtendedCOMMNDRrrRRRrrrXXXXX
2:ARGUEMENT
3:...
/* If this variable is non-zero, it indicates the stack_idx
of immediately called by CCL_MapMultiple. */
-static int stack_idx_of_map_multiple = 0;
+static int stack_idx_of_map_multiple;
#define PUSH_MAPPING_STACK(restlen, orig) \
do { \
} while (0)
#define CCL_CALL_FOR_MAP_INSTRUCTION(symbol, ret_ic) \
- do { \
+if (1) \
+ { \
struct ccl_program called_ccl; \
if (stack_idx >= 256 \
|| (setup_ccl_program (&called_ccl, (symbol)) != 0)) \
ccl_prog = called_ccl.prog; \
ic = CCL_HEADER_MAIN; \
goto ccl_repeat; \
- } while (0)
+ } \
+else
#define CCL_MapSingle 0x12 /* Map by single code conversion map
1:ExtendedCOMMNDXXXRRRrrrXXXXX
r[7] = LOWER_BYTE (SJIS (Y, Z) */
/* Terminate CCL program successfully. */
-#define CCL_SUCCESS \
- do { \
+#define CCL_SUCCESS \
+if (1) \
+ { \
ccl->status = CCL_STAT_SUCCESS; \
- goto ccl_finish; \
- } while (0)
+ goto ccl_finish; \
+ } \
+else
/* Suspend CCL program because of reading from empty input buffer or
writing to full output buffer. When this program is resumed, the
same I/O command is executed. */
#define CCL_SUSPEND(stat) \
- do { \
+if (1) \
+ { \
ic--; \
ccl->status = stat; \
goto ccl_finish; \
- } while (0)
+ } \
+else
/* Terminate CCL program because of invalid command. Should not occur
in the normal case. */
#define CCL_INVALID_CMD \
- do { \
+if (1) \
+ { \
ccl->status = CCL_STAT_INVALID_CMD; \
goto ccl_error_handler; \
- } while (0)
+ } \
+else
/* Encode one character CH to multibyte form and write to the current
output buffer. If CH is less than 256, CH is written as is. */
-#define CCL_WRITE_CHAR(ch) \
- do { \
- int bytes = SINGLE_BYTE_CHAR_P (ch) ? 1: CHAR_BYTES (ch); \
- if (ch == '\n' && ccl->eol_type == CODING_EOL_CRLF) \
- bytes++; \
- if (!dst) \
- CCL_INVALID_CMD; \
- else if (dst + bytes <= (dst_bytes ? dst_end : src)) \
- { \
- if (ch == '\n') \
- { \
- if (ccl->eol_type == CODING_EOL_CRLF) \
- *dst++ = '\r', *dst++ = '\n'; \
- else if (ccl->eol_type == CODING_EOL_CR) \
- *dst++ = '\r'; \
- else \
- *dst++ = '\n'; \
- } \
- else if (bytes == 1) \
- { \
- *dst++ = (ch); \
- if ((ch) >= 0x80 && (ch) < 0xA0) \
- /* We may have to convert this eight-bit char to \
- multibyte form later. */ \
- dst_end--; \
- } \
- else \
- dst += CHAR_STRING (ch, dst); \
- } \
- else \
- CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
+#define CCL_WRITE_CHAR(ch) \
+ do { \
+ int bytes = SINGLE_BYTE_CHAR_P (ch) ? 1: CHAR_BYTES (ch); \
+ if (!dst) \
+ CCL_INVALID_CMD; \
+ else if (dst + bytes + extra_bytes < (dst_bytes ? dst_end : src)) \
+ { \
+ if (bytes == 1) \
+ { \
+ *dst++ = (ch); \
+ if ((ch) >= 0x80 && (ch) < 0xA0) \
+ /* We may have to convert this eight-bit char to \
+ multibyte form later. */ \
+ extra_bytes++; \
+ } \
+ else if (CHAR_VALID_P (ch, 0)) \
+ dst += CHAR_STRING (ch, dst); \
+ else \
+ CCL_INVALID_CMD; \
+ } \
+ else \
+ CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
+ } while (0)
+
+/* Encode one character CH to multibyte form and write to the current
+ output buffer. The output bytes always forms a valid multibyte
+ sequence. */
+#define CCL_WRITE_MULTIBYTE_CHAR(ch) \
+ do { \
+ int bytes = CHAR_BYTES (ch); \
+ if (!dst) \
+ CCL_INVALID_CMD; \
+ else if (dst + bytes + extra_bytes < (dst_bytes ? dst_end : src)) \
+ { \
+ if (CHAR_VALID_P ((ch), 0)) \
+ dst += CHAR_STRING ((ch), dst); \
+ else \
+ CCL_INVALID_CMD; \
+ } \
+ else \
+ CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
} while (0)
/* Write a string at ccl_prog[IC] of length LEN to the current output
CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \
} while (0)
-/* Read one byte from the current input buffer into Rth register. */
-#define CCL_READ_CHAR(r) \
- do { \
- if (!src) \
- CCL_INVALID_CMD; \
- else if (src < src_end) \
- { \
- r = *src++; \
- if (r == LEADING_CODE_8_BIT_CONTROL \
- && ccl->multibyte) \
- r = *src++ - 0x20; \
- } \
- else if (ccl->last_block) \
- { \
- ic = ccl->eof_ic; \
- goto ccl_repeat; \
- } \
- else \
- CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC); \
+/* Read one byte from the current input buffer into REGth register. */
+#define CCL_READ_CHAR(REG) \
+ do { \
+ if (!src) \
+ CCL_INVALID_CMD; \
+ else if (src < src_end) \
+ { \
+ REG = *src++; \
+ if (REG == '\n' \
+ && ccl->eol_type != CODING_EOL_LF) \
+ { \
+ /* We are encoding. */ \
+ if (ccl->eol_type == CODING_EOL_CRLF) \
+ { \
+ if (ccl->cr_consumed) \
+ ccl->cr_consumed = 0; \
+ else \
+ { \
+ ccl->cr_consumed = 1; \
+ REG = '\r'; \
+ src--; \
+ } \
+ } \
+ else \
+ REG = '\r'; \
+ } \
+ if (REG == LEADING_CODE_8_BIT_CONTROL \
+ && ccl->multibyte) \
+ REG = *src++ - 0x20; \
+ } \
+ else if (ccl->last_block) \
+ { \
+ ic = ccl->eof_ic; \
+ goto ccl_repeat; \
+ } \
+ else \
+ CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC); \
} while (0)
{
register int *reg = ccl->reg;
register int ic = ccl->ic;
- register int code, field1, field2;
+ register int code = 0, field1, field2;
register Lisp_Object *ccl_prog = ccl->prog;
unsigned char *src = source, *src_end = src + src_bytes;
unsigned char *dst = destination, *dst_end = dst + dst_bytes;
int jump_address;
- int i, j, op;
+ int i = 0, j, op;
int stack_idx = ccl->stack_idx;
/* Instruction counter of the current CCL code. */
- int this_ic;
+ int this_ic = 0;
+ /* CCL_WRITE_CHAR will produce 8-bit code of range 0x80..0x9F. But,
+ each of them will be converted to multibyte form of 2-byte
+ sequence. For that conversion, we remember how many more bytes
+ we must keep in DESTINATION in this variable. */
+ int extra_bytes = 0;
if (ic >= ccl->eof_ic)
ic = CCL_HEADER_MAIN;
- if (ccl->buf_magnification ==0) /* We can't produce any bytes. */
+ if (ccl->buf_magnification == 0) /* We can't produce any bytes. */
dst = NULL;
/* Set mapping stack pointer. */
ic = jump_address;
break;
- case CCL_Extention:
+ case CCL_Extension:
switch (EXCMD)
{
case CCL_ReadMultibyteChar2:
if (!src)
CCL_INVALID_CMD;
- do {
- if (src >= src_end)
- {
- src++;
- goto ccl_read_multibyte_character_suspend;
- }
+ if (src >= src_end)
+ {
+ src++;
+ goto ccl_read_multibyte_character_suspend;
+ }
- i = *src++;
- if (i < 0x80)
- {
- /* ASCII */
- reg[rrr] = i;
- reg[RRR] = CHARSET_ASCII;
- }
- else if (i <= MAX_CHARSET_OFFICIAL_DIMENSION1)
- {
- if (src >= src_end)
- goto ccl_read_multibyte_character_suspend;
- reg[RRR] = i;
- reg[rrr] = (*src++ & 0x7F);
- }
- else if (i <= MAX_CHARSET_OFFICIAL_DIMENSION2)
- {
- if ((src + 1) >= src_end)
- goto ccl_read_multibyte_character_suspend;
- reg[RRR] = i;
- i = (*src++ & 0x7F);
- reg[rrr] = ((i << 7) | (*src & 0x7F));
- src++;
- }
- else if ((i == LEADING_CODE_PRIVATE_11)
- || (i == LEADING_CODE_PRIVATE_12))
- {
- if ((src + 1) >= src_end)
- goto ccl_read_multibyte_character_suspend;
- reg[RRR] = *src++;
- reg[rrr] = (*src++ & 0x7F);
- }
- else if ((i == LEADING_CODE_PRIVATE_21)
- || (i == LEADING_CODE_PRIVATE_22))
- {
- if ((src + 2) >= src_end)
- goto ccl_read_multibyte_character_suspend;
- reg[RRR] = *src++;
- i = (*src++ & 0x7F);
- reg[rrr] = ((i << 7) | (*src & 0x7F));
- src++;
- }
- else if (i == LEADING_CODE_8_BIT_CONTROL)
- {
- if (src >= src_end)
- goto ccl_read_multibyte_character_suspend;
- reg[RRR] = CHARSET_8_BIT_CONTROL;
- reg[rrr] = (*src++ - 0x20);
- }
- else if (i >= 0xA0)
- {
- reg[RRR] = CHARSET_8_BIT_GRAPHIC;
- reg[rrr] = i;
- }
- else
- {
- /* INVALID CODE. Return a single byte character. */
- reg[RRR] = CHARSET_ASCII;
- reg[rrr] = i;
- }
- break;
- } while (1);
+ if (!ccl->multibyte)
+ {
+ int bytes;
+ if (!UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+ {
+ reg[RRR] = CHARSET_8_BIT_CONTROL;
+ reg[rrr] = *src++;
+ break;
+ }
+ }
+ i = *src++;
+ if (i == '\n' && ccl->eol_type != CODING_EOL_LF)
+ {
+ /* We are encoding. */
+ if (ccl->eol_type == CODING_EOL_CRLF)
+ {
+ if (ccl->cr_consumed)
+ ccl->cr_consumed = 0;
+ else
+ {
+ ccl->cr_consumed = 1;
+ i = '\r';
+ src--;
+ }
+ }
+ else
+ i = '\r';
+ reg[rrr] = i;
+ reg[RRR] = CHARSET_ASCII;
+ }
+ else if (i < 0x80)
+ {
+ /* ASCII */
+ reg[rrr] = i;
+ reg[RRR] = CHARSET_ASCII;
+ }
+ else if (i <= MAX_CHARSET_OFFICIAL_DIMENSION2)
+ {
+ int dimension = BYTES_BY_CHAR_HEAD (i) - 1;
+
+ if (dimension == 0)
+ {
+ /* `i' is a leading code for an undefined charset. */
+ reg[RRR] = CHARSET_8_BIT_GRAPHIC;
+ reg[rrr] = i;
+ }
+ else if (src + dimension > src_end)
+ goto ccl_read_multibyte_character_suspend;
+ else
+ {
+ reg[RRR] = i;
+ i = (*src++ & 0x7F);
+ if (dimension == 1)
+ reg[rrr] = i;
+ else
+ reg[rrr] = ((i << 7) | (*src++ & 0x7F));
+ }
+ }
+ else if ((i == LEADING_CODE_PRIVATE_11)
+ || (i == LEADING_CODE_PRIVATE_12))
+ {
+ if ((src + 1) >= src_end)
+ goto ccl_read_multibyte_character_suspend;
+ reg[RRR] = *src++;
+ reg[rrr] = (*src++ & 0x7F);
+ }
+ else if ((i == LEADING_CODE_PRIVATE_21)
+ || (i == LEADING_CODE_PRIVATE_22))
+ {
+ if ((src + 2) >= src_end)
+ goto ccl_read_multibyte_character_suspend;
+ reg[RRR] = *src++;
+ i = (*src++ & 0x7F);
+ reg[rrr] = ((i << 7) | (*src & 0x7F));
+ src++;
+ }
+ else if (i == LEADING_CODE_8_BIT_CONTROL)
+ {
+ if (src >= src_end)
+ goto ccl_read_multibyte_character_suspend;
+ reg[RRR] = CHARSET_8_BIT_CONTROL;
+ reg[rrr] = (*src++ - 0x20);
+ }
+ else if (i >= 0xA0)
+ {
+ reg[RRR] = CHARSET_8_BIT_GRAPHIC;
+ reg[rrr] = i;
+ }
+ else
+ {
+ /* INVALID CODE. Return a single byte character. */
+ reg[RRR] = CHARSET_ASCII;
+ reg[rrr] = i;
+ }
break;
ccl_read_multibyte_character_suspend:
+ if (src <= src_end && !ccl->multibyte && ccl->last_block)
+ {
+ reg[RRR] = CHARSET_8_BIT_CONTROL;
+ reg[rrr] = i;
+ break;
+ }
src--;
if (ccl->last_block)
{
else
i = ((i - 0xE0) << 14) | reg[rrr];
- CCL_WRITE_CHAR (i);
+ CCL_WRITE_MULTIBYTE_CHAR (i);
break;
}
ccl_error_handler:
- if (destination)
+ /* The suppress_error member is set when e.g. a CCL-based coding
+ system is used for terminal output. */
+ if (!ccl->suppress_error && destination)
{
/* We can insert an error message only if DESTINATION is
specified and we still have a room to store the message
bcopy (msg, dst, msglen);
dst += msglen;
}
+
+ if (ccl->status == CCL_STAT_INVALID_CMD)
+ {
+#if 0 /* If the remaining bytes contain 0x80..0x9F, copying them
+ results in an invalid multibyte sequence. */
+
+ /* Copy the remaining source data. */
+ int i = src_end - src;
+ if (dst_bytes && (dst_end - dst) < i)
+ i = dst_end - dst;
+ bcopy (src, dst, i);
+ src += i;
+ dst += i;
+#else
+ /* Signal that we've consumed everything. */
+ src = src_end;
+#endif
+ }
}
ccl_finish:
ccl->ic = ic;
ccl->stack_idx = stack_idx;
ccl->prog = ccl_prog;
- if (consumed) *consumed = src - source;
+ ccl->eight_bit_control = (extra_bytes > 0);
+ if (consumed)
+ *consumed = src - source;
return (dst ? dst - destination : 0);
}
ccl->status = 0;
ccl->stack_idx = 0;
ccl->eol_type = CODING_EOL_LF;
+ ccl->suppress_error = 0;
return 0;
}
"Execute CCL-PROGRAM with registers initialized by REGISTERS.\n\
\n\
CCL-PROGRAM is a CCL program name (symbol)\n\
-or a compiled code generated by `ccl-compile' (for backward compatibility,\n\
-in this case, the overhead of the execution is bigger than the former case).\n\
+or compiled code generated by `ccl-compile' (for backward compatibility.\n\
+In the latter case, the execution overhead is bigger than in the former).\n\
No I/O commands should appear in CCL-PROGRAM.\n\
\n\
REGISTERS is a vector of [R0 R1 ... R7] where RN is an initial value\n\
- of Nth register.\n\
+for the Nth register.\n\
\n\
As side effect, each element of REGISTERS holds the value of\n\
- corresponding register after the execution.\n\
+the corresponding register after the execution.\n\
\n\
-See the documentation of `define-ccl-program' for the detail of CCL program.")
+See the documentation of `define-ccl-program' for a definition of CCL\n\
+programs.")
(ccl_prog, reg)
Lisp_Object ccl_prog, reg;
{
? XINT (XVECTOR (reg)->contents[i])
: 0);
- ccl_driver (&ccl, (char *)0, (char *)0, 0, 0, (int *)0);
+ ccl_driver (&ccl, (unsigned char *)0, (unsigned char *)0, 0, 0, (int *)0);
QUIT;
if (ccl.status != CCL_STAT_SUCCESS)
error ("Error in CCL program at %dth code", ccl.ic);
{
CHECK_VECTOR (ccl_prog, 1);
resolved = resolve_symbol_ccl_program (ccl_prog);
- if (! NILP (resolved))
+ if (NILP (resolved))
+ error ("Error in CCL program");
+ if (VECTORP (resolved))
{
ccl_prog = resolved;
resolved = Qt;
}
+ else
+ resolved = Qnil;
}
for (idx = 0; idx < len; idx++)
/* Register code conversion map.
A code conversion map consists of numbers, Qt, Qnil, and Qlambda.
- The first element is start code point.
- The rest elements are mapped numbers.
+ The first element is the start code point.
+ The other elements are mapped numbers.
Symbol t means to map to an original number before mapping.
Symbol nil means that the corresponding element is empty.
- Symbol lambda menas to terminate mapping here.
+ Symbol lambda means to terminate mapping here.
*/
DEFUN ("register-code-conversion-map", Fregister_code_conversion_map,