Lisp_Object Vsjis_coding_system;
Lisp_Object Vbig5_coding_system;
-static void record_conversion_result (struct coding_system *coding,
- enum coding_result_code result);
-static int detect_coding_utf_8 P_ ((struct coding_system *,
- struct coding_detection_info *info));
-static void decode_coding_utf_8 P_ ((struct coding_system *));
-static int encode_coding_utf_8 P_ ((struct coding_system *));
-
-static int detect_coding_utf_16 P_ ((struct coding_system *,
- struct coding_detection_info *info));
-static void decode_coding_utf_16 P_ ((struct coding_system *));
-static int encode_coding_utf_16 P_ ((struct coding_system *));
-
-static int detect_coding_iso_2022 P_ ((struct coding_system *,
- struct coding_detection_info *info));
-static void decode_coding_iso_2022 P_ ((struct coding_system *));
-static int encode_coding_iso_2022 P_ ((struct coding_system *));
-
-static int detect_coding_emacs_mule P_ ((struct coding_system *,
- struct coding_detection_info *info));
-static void decode_coding_emacs_mule P_ ((struct coding_system *));
-static int encode_coding_emacs_mule P_ ((struct coding_system *));
-
-static int detect_coding_sjis P_ ((struct coding_system *,
- struct coding_detection_info *info));
-static void decode_coding_sjis P_ ((struct coding_system *));
-static int encode_coding_sjis P_ ((struct coding_system *));
-
-static int detect_coding_big5 P_ ((struct coding_system *,
- struct coding_detection_info *info));
-static void decode_coding_big5 P_ ((struct coding_system *));
-static int encode_coding_big5 P_ ((struct coding_system *));
-
-static int detect_coding_ccl P_ ((struct coding_system *,
- struct coding_detection_info *info));
-static void decode_coding_ccl P_ ((struct coding_system *));
-static int encode_coding_ccl P_ ((struct coding_system *));
-
-static void decode_coding_raw_text P_ ((struct coding_system *));
-static int encode_coding_raw_text P_ ((struct coding_system *));
-
-
/* ISO2022 section */
#define CODING_ISO_INITIAL(coding, reg) \
} while (0)
+/* Prototypes for static functions. */
+static void record_conversion_result P_ ((struct coding_system *coding,
+ enum coding_result_code result));
+static int detect_coding_utf_8 P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_utf_8 P_ ((struct coding_system *));
+static int encode_coding_utf_8 P_ ((struct coding_system *));
+
+static int detect_coding_utf_16 P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_utf_16 P_ ((struct coding_system *));
+static int encode_coding_utf_16 P_ ((struct coding_system *));
+
+static int detect_coding_iso_2022 P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_iso_2022 P_ ((struct coding_system *));
+static int encode_coding_iso_2022 P_ ((struct coding_system *));
+
+static int detect_coding_emacs_mule P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_emacs_mule P_ ((struct coding_system *));
+static int encode_coding_emacs_mule P_ ((struct coding_system *));
+
+static int detect_coding_sjis P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_sjis P_ ((struct coding_system *));
+static int encode_coding_sjis P_ ((struct coding_system *));
+
+static int detect_coding_big5 P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_big5 P_ ((struct coding_system *));
+static int encode_coding_big5 P_ ((struct coding_system *));
+
+static int detect_coding_ccl P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_ccl P_ ((struct coding_system *));
+static int encode_coding_ccl P_ ((struct coding_system *));
+
+static void decode_coding_raw_text P_ ((struct coding_system *));
+static int encode_coding_raw_text P_ ((struct coding_system *));
+
+static void coding_set_source P_ ((struct coding_system *));
+static void coding_set_destination P_ ((struct coding_system *));
+static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
+static void coding_alloc_by_making_gap P_ ((struct coding_system *,
+ EMACS_INT));
+static unsigned char *alloc_destination P_ ((struct coding_system *,
+ EMACS_INT, unsigned char *));
+static void setup_iso_safe_charsets P_ ((Lisp_Object));
+static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
+ int *, int *,
+ unsigned char *));
+static int detect_eol P_ ((const unsigned char *,
+ EMACS_INT, enum coding_category));
+static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
+static void decode_eol P_ ((struct coding_system *));
+static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
+static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
+ int, int *, int *));
+static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
+static INLINE void produce_composition P_ ((struct coding_system *, int *,
+ EMACS_INT));
+static INLINE void produce_charset P_ ((struct coding_system *, int *,
+ EMACS_INT));
+static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
+static int decode_coding P_ ((struct coding_system *));
+static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
+ struct coding_system *,
+ int *, EMACS_INT *));
+static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
+ struct coding_system *,
+ int *, EMACS_INT *));
+static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
+static int encode_coding P_ ((struct coding_system *));
+static Lisp_Object make_conversion_work_buffer P_ ((int));
+static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
+static INLINE int char_encodable_p P_ ((int, Lisp_Object));
+static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
+
static void
record_conversion_result (struct coding_system *coding,
enum coding_result_code result)
{
ASSURE_DESTINATION (safe_room);
c = *charbuf++;
- dst += CHAR_STRING (c, dst);
+ if (CHAR_BYTE8_P (c))
+ *dst++ = CHAR_TO_BYTE8 (c);
+ else
+ dst += CHAR_STRING (c, dst);
produced_chars++;
}
}
}
else if (c1 >= 0 && c2 >= 0)
{
- unsigned char b1[256], b2[256];
- int b1_variants = 1, b2_variants = 1;
- int n;
-
- bzero (b1, 256), bzero (b2, 256);
- b1[c1]++, b2[c2]++;
- for (n = 0; n < 256 && src < src_end; n++)
- {
- src_base = src;
- ONE_MORE_BYTE (c1);
- ONE_MORE_BYTE (c2);
- if (c1 < 0 || c2 < 0)
- break;
- if (! b1[c1++]) b1_variants++;
- if (! b2[c2++]) b2_variants++;
- }
- if (b1_variants < b2_variants)
- detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG;
- else
- detect_info->found |= CATEGORY_MASK_UTF_16_LE_NOSIG;
detect_info->rejected
|= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
}
if (! (charset = emacs_mule_charset[c]))
goto invalid_code;
ONE_MORE_BYTE (c);
- if (c < 0)
+ if (c < 0xA0)
goto invalid_code;
code = c & 0x7F;
break;
|| c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
{
ONE_MORE_BYTE (c);
- if (c < 0 || ! (charset = emacs_mule_charset[c]))
+ if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
goto invalid_code;
ONE_MORE_BYTE (c);
- if (c < 0)
+ if (c < 0xA0)
goto invalid_code;
code = c & 0x7F;
}
if (! (charset = emacs_mule_charset[c]))
goto invalid_code;
ONE_MORE_BYTE (c);
- if (c < 0)
+ if (c < 0xA0)
goto invalid_code;
code = (c & 0x7F) << 8;
ONE_MORE_BYTE (c);
- if (c < 0)
+ if (c < 0xA0)
goto invalid_code;
code |= c & 0x7F;
}
if (c < 0 || ! (charset = emacs_mule_charset[c]))
goto invalid_code;
ONE_MORE_BYTE (c);
- if (c < 0)
+ if (c < 0xA0)
goto invalid_code;
code = (c & 0x7F) << 8;
ONE_MORE_BYTE (c);
- if (c < 0)
+ if (c < 0xA0)
goto invalid_code;
code |= c & 0x7F;
break;
}
-/* Set designation state into CODING. */
+/* Set designation state into CODING. Set CHARS_96 to -1 if the
+ escape sequence should be kept. */
#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
do { \
int id, prev; \
|| !SAFE_CHARSET_P (coding, id)) \
{ \
CODING_ISO_DESIGNATION (coding, reg) = -2; \
- goto invalid_code; \
+ chars_96 = -1; \
+ break; \
} \
prev = CODING_ISO_DESIGNATION (coding, reg); \
if (id == charset_jisx0201_roman) \
designation is ASCII to REG, we should keep this designation \
sequence. */ \
if (prev == -2 && id == charset_ascii) \
- goto invalid_code; \
+ chars_96 = -1; \
} while (0)
/* Charsets invoked to graphic plane 0 and 1 respectively. */
int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
+ int charset_id_2, charset_id_3;
struct charset *charset;
int c;
/* For handling composition sequence. */
continue;
}
}
- charset = CHARSET_FROM_ID (charset_id_0);
+ if (charset_id_0 < 0)
+ charset = CHARSET_FROM_ID (charset_ascii);
+ else
+ charset = CHARSET_FROM_ID (charset_id_0);
break;
case ISO_0xA0_or_0xFF:
case '$': /* designation of 2-byte character set */
if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
goto invalid_code;
- ONE_MORE_BYTE (c1);
- if (c1 >= '@' && c1 <= 'B')
- { /* designation of JISX0208.1978, GB2312.1980,
+ {
+ int reg, chars96;
+
+ ONE_MORE_BYTE (c1);
+ if (c1 >= '@' && c1 <= 'B')
+ { /* designation of JISX0208.1978, GB2312.1980,
or JISX0208.1980 */
- DECODE_DESIGNATION (0, 2, 0, c1);
- }
- else if (c1 >= 0x28 && c1 <= 0x2B)
- { /* designation of DIMENSION2_CHARS94 character set */
- ONE_MORE_BYTE (c2);
- DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
- }
- else if (c1 >= 0x2C && c1 <= 0x2F)
- { /* designation of DIMENSION2_CHARS96 character set */
- ONE_MORE_BYTE (c2);
- DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
- }
- else
- goto invalid_code;
- /* We must update these variables now. */
- charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
- charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
+ reg = 0, chars96 = 0;
+ }
+ else if (c1 >= 0x28 && c1 <= 0x2B)
+ { /* designation of DIMENSION2_CHARS94 character set */
+ reg = c1 - 0x28, chars96 = 0;
+ ONE_MORE_BYTE (c1);
+ }
+ else if (c1 >= 0x2C && c1 <= 0x2F)
+ { /* designation of DIMENSION2_CHARS96 character set */
+ reg = c1 - 0x2C, chars96 = 1;
+ ONE_MORE_BYTE (c1);
+ }
+ else
+ goto invalid_code;
+ DECODE_DESIGNATION (reg, 2, chars96, c1);
+ /* We must update these variables now. */
+ if (reg == 0)
+ charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
+ else if (reg == 1)
+ charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
+ if (chars96 < 0)
+ goto invalid_code;
+ }
continue;
case 'n': /* invocation of locking-shift-2 */
if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
|| CODING_ISO_DESIGNATION (coding, 2) < 0)
goto invalid_code;
- charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
+ charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
+ if (charset_id_2 < 0)
+ charset = CHARSET_FROM_ID (charset_ascii);
+ else
+ charset = CHARSET_FROM_ID (charset_id_2);
ONE_MORE_BYTE (c1);
if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
goto invalid_code;
if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
|| CODING_ISO_DESIGNATION (coding, 3) < 0)
goto invalid_code;
- charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
+ charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
+ if (charset_id_3 < 0)
+ charset = CHARSET_FROM_ID (charset_ascii);
+ else
+ charset = CHARSET_FROM_ID (charset_id_3);
ONE_MORE_BYTE (c1);
if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
goto invalid_code;
&& src + 1 < src_end
&& src[0] == '%'
&& src[1] == '@')
- break;
+ {
+ src += 2;
+ break;
+ }
*p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
}
if (p + 3 > charbuf_end)
default:
if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
goto invalid_code;
- if (c1 >= 0x28 && c1 <= 0x2B)
- { /* designation of DIMENSION1_CHARS94 character set */
- ONE_MORE_BYTE (c2);
- DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
- }
- else if (c1 >= 0x2C && c1 <= 0x2F)
- { /* designation of DIMENSION1_CHARS96 character set */
- ONE_MORE_BYTE (c2);
- DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
- }
- else
- goto invalid_code;
- /* We must update these variables now. */
- charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
- charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
+ {
+ int reg, chars96;
+
+ if (c1 >= 0x28 && c1 <= 0x2B)
+ { /* designation of DIMENSION1_CHARS94 character set */
+ reg = c1 - 0x28, chars96 = 0;
+ ONE_MORE_BYTE (c1);
+ }
+ else if (c1 >= 0x2C && c1 <= 0x2F)
+ { /* designation of DIMENSION1_CHARS96 character set */
+ reg = c1 - 0x2C, chars96 = 1;
+ ONE_MORE_BYTE (c1);
+ }
+ else
+ goto invalid_code;
+ DECODE_DESIGNATION (reg, 1, chars96, c1);
+ /* We must update these variables now. */
+ if (reg == 0)
+ charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
+ else if (reg == 1)
+ charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
+ if (chars96 < 0)
+ goto invalid_code;
+ }
continue;
}
}
coding->detector = NULL;
coding->decoder = decode_coding_raw_text;
coding->encoder = encode_coding_raw_text;
+ if (! EQ (eol_type, Qunix))
+ {
+ coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
+ if (! VECTORP (eol_type))
+ coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
+ }
+
}
return;
static int
detect_eol (source, src_bytes, category)
- unsigned char *source;
+ const unsigned char *source;
EMACS_INT src_bytes;
enum coding_category category;
{
- unsigned char *src = source, *src_end = src + src_bytes;
+ const unsigned char *src = source, *src_end = src + src_bytes;
unsigned char c;
int total = 0;
int eol_seen = EOL_SEEN_NONE;
struct coding_system *coding;
{
const unsigned char *src, *src_end;
- Lisp_Object attrs, coding_type;
coding->consumed = coding->consumed_char = 0;
coding->produced = coding->produced_char = 0;
if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
{
int c, i;
+ struct coding_detection_info detect_info;
+ detect_info.checked = detect_info.found = detect_info.rejected = 0;
for (i = 0, src = coding->source; src < src_end; i++, src++)
{
c = *src;
- if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
- || c == ISO_CODE_SI
- || c == ISO_CODE_SO)))
+ if (c & 0x80)
break;
+ if (c < 0x20
+ && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+ && ! inhibit_iso_escape_detection
+ && ! detect_info.checked)
+ {
+ coding->head_ascii = src - (coding->source + coding->consumed);
+ if (detect_coding_iso_2022 (coding, &detect_info))
+ {
+ /* We have scanned the whole data. */
+ if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
+ /* We didn't find an 8-bit code. */
+ src = src_end;
+ break;
+ }
+ }
}
coding->head_ascii = src - (coding->source + coding->consumed);
- if (coding->head_ascii < coding->src_bytes)
+ if (coding->head_ascii < coding->src_bytes
+ || detect_info.found)
{
- struct coding_detection_info detect_info;
enum coding_category category;
struct coding_system *this;
- detect_info.checked = detect_info.found = detect_info.rejected = 0;
- for (i = 0; i < coding_category_raw_text; i++)
- {
- category = coding_priorities[i];
- this = coding_categories + category;
- if (this->id < 0)
- {
- /* No coding system of this category is defined. */
- detect_info.rejected |= (1 << category);
- }
- else if (category >= coding_category_raw_text)
- continue;
- else if (detect_info.checked & (1 << category))
- {
- if (detect_info.found & (1 << category))
- break;
- }
- else if ((*(this->detector)) (coding, &detect_info)
- && detect_info.found & (1 << category))
- {
- if (category == coding_category_utf_16_auto)
- {
- if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
- category = coding_category_utf_16_le;
- else
- category = coding_category_utf_16_be;
- }
+ if (coding->head_ascii == coding->src_bytes)
+ /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
+ for (i = 0; i < coding_category_raw_text; i++)
+ {
+ category = coding_priorities[i];
+ this = coding_categories + category;
+ if (detect_info.found & (1 << category))
break;
- }
- }
+ }
+ else
+ for (i = 0; i < coding_category_raw_text; i++)
+ {
+ category = coding_priorities[i];
+ this = coding_categories + category;
+ if (this->id < 0)
+ {
+ /* No coding system of this category is defined. */
+ detect_info.rejected |= (1 << category);
+ }
+ else if (category >= coding_category_raw_text)
+ continue;
+ else if (detect_info.checked & (1 << category))
+ {
+ if (detect_info.found & (1 << category))
+ break;
+ }
+ else if ((*(this->detector)) (coding, &detect_info)
+ && detect_info.found & (1 << category))
+ {
+ if (category == coding_category_utf_16_auto)
+ {
+ if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+ category = coding_category_utf_16_le;
+ else
+ category = coding_category_utf_16_be;
+ }
+ break;
+ }
+ }
+
if (i < coding_category_raw_text)
setup_coding_system (CODING_ID_NAME (this->id), coding);
else if (detect_info.rejected == CATEGORY_MASK_ANY)
attribute vector ATTRS for encoding (ENCODEP is nonzero) or
decoding (ENCODEP is zero). */
-static INLINE
-get_translation_table (attrs, encodep)
+static Lisp_Object
+get_translation_table (attrs, encodep, max_lookup)
+ Lisp_Object attrs;
+ int encodep, *max_lookup;
{
Lisp_Object standard, translation_table;
+ Lisp_Object val;
if (encodep)
translation_table = CODING_ATTR_ENCODE_TBL (attrs),
translation_table = CODING_ATTR_DECODE_TBL (attrs),
standard = Vstandard_translation_table_for_decode;
if (NILP (translation_table))
- return standard;
- if (SYMBOLP (translation_table))
- translation_table = Fget (translation_table, Qtranslation_table);
- else if (CONSP (translation_table))
+ translation_table = standard;
+ else
{
- Lisp_Object val;
-
- translation_table = Fcopy_sequence (translation_table);
- for (val = translation_table; CONSP (val); val = XCDR (val))
- if (SYMBOLP (XCAR (val)))
- XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
+ if (SYMBOLP (translation_table))
+ translation_table = Fget (translation_table, Qtranslation_table);
+ else if (CONSP (translation_table))
+ {
+ translation_table = Fcopy_sequence (translation_table);
+ for (val = translation_table; CONSP (val); val = XCDR (val))
+ if (SYMBOLP (XCAR (val)))
+ XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
+ }
+ if (CHAR_TABLE_P (standard))
+ {
+ if (CONSP (translation_table))
+ translation_table = nconc2 (translation_table,
+ Fcons (standard, Qnil));
+ else
+ translation_table = Fcons (translation_table,
+ Fcons (standard, Qnil));
+ }
}
- if (! NILP (standard))
+
+ if (max_lookup)
{
- if (CONSP (translation_table))
- translation_table = nconc2 (translation_table, Fcons (standard, Qnil));
- else
- translation_table = Fcons (translation_table, Fcons (standard, Qnil));
+ *max_lookup = 1;
+ if (CHAR_TABLE_P (translation_table)
+ && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
+ {
+ val = XCHAR_TABLE (translation_table)->extras[1];
+ if (NATNUMP (val) && *max_lookup < XFASTINT (val))
+ *max_lookup = XFASTINT (val);
+ }
+ else if (CONSP (translation_table))
+ {
+ Lisp_Object tail, val;
+
+ for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
+ if (CHAR_TABLE_P (XCAR (tail))
+ && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
+ {
+ val = XCHAR_TABLE (XCAR (tail))->extras[1];
+ if (NATNUMP (val) && *max_lookup < XFASTINT (val))
+ *max_lookup = XFASTINT (val);
+ }
+ }
}
return translation_table;
}
+#define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
+ do { \
+ trans = Qnil; \
+ if (CHAR_TABLE_P (table)) \
+ { \
+ trans = CHAR_TABLE_REF (table, c); \
+ if (CHARACTERP (trans)) \
+ c = XFASTINT (trans), trans = Qnil; \
+ } \
+ else if (CONSP (table)) \
+ { \
+ Lisp_Object tail; \
+ \
+ for (tail = table; CONSP (tail); tail = XCDR (tail)) \
+ if (CHAR_TABLE_P (XCAR (tail))) \
+ { \
+ trans = CHAR_TABLE_REF (XCAR (tail), c); \
+ if (CHARACTERP (trans)) \
+ c = XFASTINT (trans), trans = Qnil; \
+ else if (! NILP (trans)) \
+ break; \
+ } \
+ } \
+ } while (0)
-static void
-translate_chars (coding, table)
- struct coding_system *coding;
- Lisp_Object table;
-{
- int *charbuf = coding->charbuf;
- int *charbuf_end = charbuf + coding->charbuf_used;
- int c;
-
- if (coding->chars_at_source)
- return;
-
- while (charbuf < charbuf_end)
- {
- c = *charbuf;
- if (c < 0)
- charbuf += -c;
- else
- *charbuf++ = translate_char (table, c);
- }
-}
static Lisp_Object
get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
int last_block;
int *from_nchars, *to_nchars;
{
- /* VAL is TO-CHAR, [TO-CHAR ...], ([FROM-CHAR ...] . TO-CHAR), or
- ([FROM-CHAR ...] . [TO-CHAR ...]). */
+ /* VAL is TO or (([FROM-CHAR ...] . TO) ...) where TO is TO-CHAR or
+ [TO-CHAR ...]. */
if (CONSP (val))
{
- Lisp_Object from;
+ Lisp_Object from, tail;
int i, len;
- from = XCAR (val);
- val = XCDR (val);
- len = ASIZE (from);
- for (i = 0; i < len; i++)
+ for (tail = val; CONSP (tail); tail = XCDR (tail))
{
- if (buf + i == buf_end)
- return (last_block ? Qnil : Qt);
- if (XINT (AREF (from, i)) != buf[i])
- return Qnil;
+ val = XCAR (tail);
+ from = XCAR (val);
+ len = ASIZE (from);
+ for (i = 0; i < len; i++)
+ {
+ if (buf + i == buf_end)
+ {
+ if (! last_block)
+ return Qt;
+ break;
+ }
+ if (XINT (AREF (from, i)) != buf[i])
+ break;
+ }
+ if (i == len)
+ {
+ val = XCDR (val);
+ *from_nchars = len;
+ break;
+ }
}
- *from_nchars = len;
+ if (! CONSP (tail))
+ return Qnil;
}
if (VECTORP (val))
*buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
int from_nchars = 1, to_nchars = 1;
Lisp_Object trans = Qnil;
- if (! NILP (translation_table)
- && ! NILP (trans = CHAR_TABLE_REF (translation_table, c)))
+ LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
+ if (! NILP (trans))
{
trans = get_translation (trans, buf, buf_end, last_block,
&from_nchars, &to_nchars);
dst_end = coding->destination + coding->dst_bytes;
}
- for (i = 0; i < to_nchars; i++, c = XINT (AREF (trans, i)))
+ for (i = 0; i < to_nchars; i++)
{
+ if (i > 0)
+ c = XINT (AREF (trans, i));
if (coding->dst_multibyte
|| ! CHAR_BYTE8_P (c))
CHAR_STRING_ADVANCE (c, dst);
len = -charbuf[0];
to = pos + charbuf[2];
+ if (to <= pos)
+ return;
method = (enum composition_method) (charbuf[3]);
if (method == COMPOSITION_RELATIVE)
components = Qnil;
- else
+ else if (method >= COMPOSITION_WITH_RULE
+ && method <= COMPOSITION_WITH_RULE_ALTCHARS)
{
Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
int i;
len -= 4;
charbuf += 4;
for (i = 0; i < len; i++)
- args[i] = make_number (charbuf[i]);
+ {
+ args[i] = make_number (charbuf[i]);
+ if (args[i] < 0)
+ return;
+ }
components = (method == COMPOSITION_WITH_ALTCHARS
? Fstring (len, args) : Fvector (len, args));
}
+ else
+ return;
compose_text (pos, to, components, Qnil, coding->dst_object);
}
ALLOC_CONVERSION_WORK_AREA (coding);
attrs = CODING_ID_ATTRS (coding->id);
- translation_table = get_translation_table (attrs, 0);
+ translation_table = get_translation_table (attrs, 0, NULL);
carryover = 0;
do
coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
}
- produce_chars (coding);
+ produce_chars (coding, Qnil, 1);
}
else
{
static void
-consume_chars (coding)
+consume_chars (coding, translation_table, max_lookup)
struct coding_system *coding;
+ Lisp_Object translation_table;
+ int max_lookup;
{
int *buf = coding->charbuf;
int *buf_end = coding->charbuf + coding->charbuf_size;
Lisp_Object eol_type;
int c;
EMACS_INT stop, stop_composition, stop_charset;
+ int *lookup_buf = NULL;
+
+ if (! NILP (translation_table))
+ lookup_buf = alloca (sizeof (int) * max_lookup);
eol_type = CODING_ID_EOL_TYPE (coding->id);
if (VECTORP (eol_type))
buf_end -= 1 + MAX_ANNOTATION_LENGTH;
while (buf < buf_end)
{
+ Lisp_Object trans;
+
if (pos == stop)
{
if (pos == end_pos)
{
EMACS_INT bytes;
- if (! CODING_FOR_UNIBYTE (coding)
- && (bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
+ if (coding->encoder == encode_coding_raw_text)
+ c = *src++, pos++;
+ else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
c = STRING_CHAR_ADVANCE (src), pos += bytes;
else
- c = *src++, pos++;
+ c = BYTE8_TO_CHAR (*src), src++, pos++;
}
else
c = STRING_CHAR_ADVANCE (src), pos++;
c = '\r';
}
}
- *buf++ = c;
+
+ trans = Qnil;
+ LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
+ if (NILP (trans))
+ *buf++ = c;
+ else
+ {
+ int from_nchars = 1, to_nchars = 1;
+ int *lookup_buf_end;
+ const unsigned char *p = src;
+ int i;
+
+ lookup_buf[0] = c;
+ for (i = 1; i < max_lookup && p < src_end; i++)
+ lookup_buf[i] = STRING_CHAR_ADVANCE (p);
+ lookup_buf_end = lookup_buf + i;
+ trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
+ &from_nchars, &to_nchars);
+ if (EQ (trans, Qt)
+ || buf + to_nchars > buf_end)
+ break;
+ *buf++ = *lookup_buf;
+ for (i = 1; i < to_nchars; i++)
+ *buf++ = XINT (AREF (trans, i));
+ for (i = 1; i < from_nchars; i++, pos++)
+ src += MULTIBYTE_LENGTH_NO_CHECK (src);
+ }
}
coding->consumed = src - coding->source;
{
Lisp_Object attrs;
Lisp_Object translation_table;
+ int max_lookup;
attrs = CODING_ID_ATTRS (coding->id);
- translation_table = get_translation_table (attrs, 1);
+ if (coding->encoder == encode_coding_raw_text)
+ translation_table = Qnil, max_lookup = 0;
+ else
+ translation_table = get_translation_table (attrs, 1, &max_lookup);
if (BUFFERP (coding->dst_object))
{
do {
coding_set_source (coding);
- consume_chars (coding);
-
- if (!NILP (translation_table))
- translate_chars (coding, translation_table);
-
+ consume_chars (coding, translation_table, max_lookup);
coding_set_destination (coding);
(*(coding->encoder)) (coding);
} while (coding->consumed_char < coding->src_chars);
/* Return a working buffer of code convesion. MULTIBYTE specifies the
multibyteness of returning buffer. */
-Lisp_Object
+static Lisp_Object
make_conversion_work_buffer (multibyte)
+ int multibyte;
{
Lisp_Object name, workbuf;
struct buffer *current;
DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
1, 1, 0,
doc: /* Check validity of CODING-SYSTEM.
-If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
+If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
+It is valid if it is nil or a symbol defined as a coding system by the
+function `define-coding-system'. */)
(coding_system)
Lisp_Object coding_system;
{
for (i = 0; src < src_end; i++, src++)
{
c = *src;
- if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
- || c == ISO_CODE_SI
- || c == ISO_CODE_SO)))
+ if (c & 0x80)
break;
+ if (c < 0x20
+ && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+ && inhibit_iso_escape_detection)
+ {
+ coding.head_ascii = src - coding.source;
+ if (detect_coding_iso_2022 (&coding, &detect_info))
+ {
+ /* We have scanned the whole data. */
+ if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
+ /* We didn't find an 8-bit code. */
+ src = src_end;
+ break;
+ }
+ }
}
coding.head_ascii = src - coding.source;
- if (src < src_end)
- for (i = 0; i < coding_category_raw_text; i++)
- {
- category = coding_priorities[i];
- this = coding_categories + category;
-
- if (this->id < 0)
- {
- /* No coding system of this category is defined. */
- detect_info.rejected |= (1 << category);
- }
- else if (category >= coding_category_raw_text)
- continue;
- else if (detect_info.checked & (1 << category))
+ if (src < src_end
+ || detect_info.found)
+ {
+ if (src == src_end)
+ /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
+ for (i = 0; i < coding_category_raw_text; i++)
{
- if (highest
- && (detect_info.found & (1 << category)))
+ category = coding_priorities[i];
+ if (detect_info.found & (1 << category))
break;
}
- else
+ else
+ for (i = 0; i < coding_category_raw_text; i++)
{
- if ((*(this->detector)) (&coding, &detect_info)
- && highest
- && (detect_info.found & (1 << category)))
+ category = coding_priorities[i];
+ this = coding_categories + category;
+
+ if (this->id < 0)
{
- if (category == coding_category_utf_16_auto)
+ /* No coding system of this category is defined. */
+ detect_info.rejected |= (1 << category);
+ }
+ else if (category >= coding_category_raw_text)
+ continue;
+ else if (detect_info.checked & (1 << category))
+ {
+ if (highest
+ && (detect_info.found & (1 << category)))
+ break;
+ }
+ else
+ {
+ if ((*(this->detector)) (&coding, &detect_info)
+ && highest
+ && (detect_info.found & (1 << category)))
{
- if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
- category = coding_category_utf_16_le;
- else
- category = coding_category_utf_16_be;
+ if (category == coding_category_utf_16_auto)
+ {
+ if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+ category = coding_category_utf_16_le;
+ else
+ category = coding_category_utf_16_be;
+ }
+ break;
}
- break;
}
}
- }
+ }
if (detect_info.rejected == CATEGORY_MASK_ANY)
{
{
if (detect_coding_utf_16 (&coding, &detect_info))
{
- enum coding_category category;
struct coding_system *this;
if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
&& ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
{
ASET (attrs, coding_attr_trans_tbl,
- get_translation_table (attrs, 1));
+ get_translation_table (attrs, 1, NULL));
coding_attrs_list = Fcons (attrs, coding_attrs_list);
}
}
}
}
- safe_codings = Qnil;
+ safe_codings = list2 (Qraw_text, Qno_conversion);
for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
if (! NILP (XCAR (tail)))
safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
return Qnil;
ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
charset_list = CODING_ATTR_CHARSET_LIST (attrs);
- translation_table = get_translation_table (attrs, 1);
+ translation_table = get_translation_table (attrs, 1, NULL);
if (NILP (string))
{
{
elt = XCAR (tail);
attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
- ASET (attrs, coding_attr_trans_tbl, get_translation_table (attrs, 1));
+ ASET (attrs, coding_attr_trans_tbl,
+ get_translation_table (attrs, 1, NULL));
list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
}
(coding_system, prop, val)
Lisp_Object coding_system, prop, val;
{
- Lisp_Object spec, attrs, plist;
+ Lisp_Object spec, attrs;
CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
attrs = AREF (spec, 0);
Qchar_table_extra_slots = intern ("char-table-extra-slots");
DEFSYM (Qtranslation_table, "translation-table");
- Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
+ Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
DEFSYM (Qtranslation_table_id, "translation-table-id");
DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
}
#endif /* emacs */
+
+/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
+ (do not change this comment) */