when there's no room in CHARBUF for a decoded character. */
unsigned char *src_base;
/* A buffer to produce decoded characters. */
- int *charbuf = coding->charbuf;
- int *charbuf_end = charbuf + coding->charbuf_size;
+ int *charbuf = coding->charbuf + coding->charbuf_used;
+ int *charbuf_end = coding->charbuf + coding->charbuf_size;
int multibytep = coding->src_multibyte;
while (1)
Lisp_Object Qbig, Qlittle;
Lisp_Object Qcoding_system_history;
Lisp_Object Qvalid_codes;
-Lisp_Object QCcategory;
+Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
+Lisp_Object QCdecode_translation_table, QCencode_translation_table;
+Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
extern Lisp_Object Qinsert_file_contents, Qwrite_region;
Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
Lisp_Object Qstart_process, Qopen_network_stream;
Lisp_Object Qtarget_idx;
+Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
+Lisp_Object Qinterrupted, Qinsufficient_memory;
+
int coding_system_require_warning;
Lisp_Object Vselect_safe_coding_system_function;
Lisp_Object Vcoding_system_for_write;
/* Coding-system actually used in the latest I/O. */
Lisp_Object Vlast_coding_system_used;
-
+/* Set to non-nil when an error is detected while code conversion. */
+Lisp_Object Vlast_code_conversion_error;
/* A vector of length 256 which contains information about special
Latin codes (especially for dealing with Microsoft codes). */
Lisp_Object Vlatin_extra_code_table;
/* Char table for translating Quail and self-inserting input. */
Lisp_Object Vtranslation_table_for_input;
-/* Global flag to tell that we can't call post-read-conversion and
- pre-write-conversion functions. Usually the value is zero, but it
- is set to 1 temporarily while such functions are running. This is
- to avoid infinite recursive call. */
-static int inhibit_pre_post_conversion;
-
/* Two special coding systems. */
Lisp_Object Vsjis_coding_system;
Lisp_Object Vbig5_coding_system;
-
-static int detect_coding_utf_8 P_ ((struct coding_system *,
- struct coding_detection_info *info));
-static void decode_coding_utf_8 P_ ((struct coding_system *));
-static int encode_coding_utf_8 P_ ((struct coding_system *));
-
-static int detect_coding_utf_16 P_ ((struct coding_system *,
- struct coding_detection_info *info));
-static void decode_coding_utf_16 P_ ((struct coding_system *));
-static int encode_coding_utf_16 P_ ((struct coding_system *));
-
-static int detect_coding_iso_2022 P_ ((struct coding_system *,
- struct coding_detection_info *info));
-static void decode_coding_iso_2022 P_ ((struct coding_system *));
-static int encode_coding_iso_2022 P_ ((struct coding_system *));
-
-static int detect_coding_emacs_mule P_ ((struct coding_system *,
- struct coding_detection_info *info));
-static void decode_coding_emacs_mule P_ ((struct coding_system *));
-static int encode_coding_emacs_mule P_ ((struct coding_system *));
-
-static int detect_coding_sjis P_ ((struct coding_system *,
- struct coding_detection_info *info));
-static void decode_coding_sjis P_ ((struct coding_system *));
-static int encode_coding_sjis P_ ((struct coding_system *));
-
-static int detect_coding_big5 P_ ((struct coding_system *,
- struct coding_detection_info *info));
-static void decode_coding_big5 P_ ((struct coding_system *));
-static int encode_coding_big5 P_ ((struct coding_system *));
-
-static int detect_coding_ccl P_ ((struct coding_system *,
- struct coding_detection_info *info));
-static void decode_coding_ccl P_ ((struct coding_system *));
-static int encode_coding_ccl P_ ((struct coding_system *));
-
-static void decode_coding_raw_text P_ ((struct coding_system *));
-static int encode_coding_raw_text P_ ((struct coding_system *));
-
-
/* ISO2022 section */
#define CODING_ISO_INITIAL(coding, reg) \
ISO_control_0, /* Control codes in the range
0x00..0x1F and 0x7F, except for the
following 5 codes. */
- ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
ISO_shift_out, /* ISO_CODE_SO (0x0E) */
ISO_shift_in, /* ISO_CODE_SI (0x0F) */
ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
#define max(a, b) ((a) > (b) ? (a) : (b))
#endif
-#define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
- do { \
- attrs = CODING_ID_ATTRS (coding->id); \
- eol_type = CODING_ID_EOL_TYPE (coding->id); \
- if (VECTORP (eol_type)) \
- eol_type = Qunix; \
- charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
+#define CODING_GET_INFO(coding, attrs, charset_list) \
+ do { \
+ (attrs) = CODING_ID_ATTRS ((coding)->id); \
+ (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
} while (0)
/* Safely get one byte from the source text pointed by SRC which ends
at SRC_END, and set C to that byte. If there are not enough bytes
- in the source, it jumps to `no_more_source'. The caller
- should declare and set these variables appropriately in advance:
- src, src_end, multibytep
-*/
+ in the source, it jumps to `no_more_source'. If multibytep is
+ nonzero, and a multibyte character is found at SRC, set C to the
+ negative value of the character code. The caller should declare
+ and set these variables appropriately in advance:
+ src, src_end, multibytep */
-#define ONE_MORE_BYTE(c) \
- do { \
- if (src == src_end) \
- { \
- if (src_base < src) \
- coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
- goto no_more_source; \
- } \
- c = *src++; \
- if (multibytep && (c & 0x80)) \
- { \
- if ((c & 0xFE) != 0xC0) \
- error ("Undecodable char found"); \
- c = ((c & 1) << 6) | *src++; \
- } \
- consumed_chars++; \
+#define ONE_MORE_BYTE(c) \
+ do { \
+ if (src == src_end) \
+ { \
+ if (src_base < src) \
+ record_conversion_result \
+ (coding, CODING_RESULT_INSUFFICIENT_SRC); \
+ goto no_more_source; \
+ } \
+ c = *src++; \
+ if (multibytep && (c & 0x80)) \
+ { \
+ if ((c & 0xFE) == 0xC0) \
+ c = ((c & 1) << 6) | *src++; \
+ else \
+ { \
+ c = - string_char (--src, &src, NULL); \
+ record_conversion_result \
+ (coding, CODING_RESULT_INVALID_SRC); \
+ } \
+ } \
+ consumed_chars++; \
} while (0)
-#define ONE_MORE_BYTE_NO_CHECK(c) \
- do { \
- c = *src++; \
- if (multibytep && (c & 0x80)) \
- { \
- if ((c & 0xFE) != 0xC0) \
- error ("Undecodable char found"); \
- c = ((c & 1) << 6) | *src++; \
- } \
- consumed_chars++; \
+#define ONE_MORE_BYTE_NO_CHECK(c) \
+ do { \
+ c = *src++; \
+ if (multibytep && (c & 0x80)) \
+ { \
+ if ((c & 0xFE) == 0xC0) \
+ c = ((c & 1) << 6) | *src++; \
+ else \
+ { \
+ c = - string_char (--src, &src, NULL); \
+ record_conversion_result \
+ (coding, CODING_RESULT_INVALID_SRC); \
+ } \
+ } \
+ consumed_chars++; \
} while (0)
} while (0)
+/* Prototypes for static functions. */
+static void record_conversion_result P_ ((struct coding_system *coding,
+ enum coding_result_code result));
+static int detect_coding_utf_8 P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_utf_8 P_ ((struct coding_system *));
+static int encode_coding_utf_8 P_ ((struct coding_system *));
+
+static int detect_coding_utf_16 P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_utf_16 P_ ((struct coding_system *));
+static int encode_coding_utf_16 P_ ((struct coding_system *));
+
+static int detect_coding_iso_2022 P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_iso_2022 P_ ((struct coding_system *));
+static int encode_coding_iso_2022 P_ ((struct coding_system *));
+
+static int detect_coding_emacs_mule P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_emacs_mule P_ ((struct coding_system *));
+static int encode_coding_emacs_mule P_ ((struct coding_system *));
+
+static int detect_coding_sjis P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_sjis P_ ((struct coding_system *));
+static int encode_coding_sjis P_ ((struct coding_system *));
+
+static int detect_coding_big5 P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_big5 P_ ((struct coding_system *));
+static int encode_coding_big5 P_ ((struct coding_system *));
+
+static int detect_coding_ccl P_ ((struct coding_system *,
+ struct coding_detection_info *info));
+static void decode_coding_ccl P_ ((struct coding_system *));
+static int encode_coding_ccl P_ ((struct coding_system *));
+
+static void decode_coding_raw_text P_ ((struct coding_system *));
+static int encode_coding_raw_text P_ ((struct coding_system *));
+
+static void coding_set_source P_ ((struct coding_system *));
+static void coding_set_destination P_ ((struct coding_system *));
+static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
+static void coding_alloc_by_making_gap P_ ((struct coding_system *,
+ EMACS_INT));
+static unsigned char *alloc_destination P_ ((struct coding_system *,
+ EMACS_INT, unsigned char *));
+static void setup_iso_safe_charsets P_ ((Lisp_Object));
+static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
+ int *, int *,
+ unsigned char *));
+static int detect_eol P_ ((const unsigned char *,
+ EMACS_INT, enum coding_category));
+static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
+static void decode_eol P_ ((struct coding_system *));
+static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
+static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
+ int, int *, int *));
+static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
+static INLINE void produce_composition P_ ((struct coding_system *, int *,
+ EMACS_INT));
+static INLINE void produce_charset P_ ((struct coding_system *, int *,
+ EMACS_INT));
+static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
+static int decode_coding P_ ((struct coding_system *));
+static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
+ struct coding_system *,
+ int *, EMACS_INT *));
+static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
+ struct coding_system *,
+ int *, EMACS_INT *));
+static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
+static int encode_coding P_ ((struct coding_system *));
+static Lisp_Object make_conversion_work_buffer P_ ((int));
+static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
+static INLINE int char_encodable_p P_ ((int, Lisp_Object));
+static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
+
+static void
+record_conversion_result (struct coding_system *coding,
+ enum coding_result_code result)
+{
+ coding->result = result;
+ switch (result)
+ {
+ case CODING_RESULT_INSUFFICIENT_SRC:
+ Vlast_code_conversion_error = Qinsufficient_source;
+ break;
+ case CODING_RESULT_INCONSISTENT_EOL:
+ Vlast_code_conversion_error = Qinconsistent_eol;
+ break;
+ case CODING_RESULT_INVALID_SRC:
+ Vlast_code_conversion_error = Qinvalid_source;
+ break;
+ case CODING_RESULT_INTERRUPT:
+ Vlast_code_conversion_error = Qinterrupted;
+ break;
+ case CODING_RESULT_INSUFFICIENT_MEM:
+ Vlast_code_conversion_error = Qinsufficient_memory;
+ break;
+ }
+}
+
#define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
do { \
charset_map_loaded = 0; \
static unsigned char *
alloc_destination (coding, nbytes, dst)
struct coding_system *coding;
- int nbytes;
+ EMACS_INT nbytes;
unsigned char *dst;
{
EMACS_INT offset = dst - coding->destination;
coding_alloc_by_making_gap (coding, nbytes);
else
coding_alloc_by_realloc (coding, nbytes);
- coding->result = CODING_RESULT_SUCCESS;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
coding_set_destination (coding);
dst = coding->destination + offset;
return dst;
/* Maximum length of annotation data (sum of annotations for
composition and charset). */
-#define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
+#define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
/* An annotation data is stored in the array coding->charbuf in this
format:
- [ -LENGTH ANNOTATION_MASK FROM TO ... ]
+ [ -LENGTH ANNOTATION_MASK NCHARS ... ]
LENGTH is the number of elements in the annotation.
ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
- FROM and TO specify the range of text annotated. They are relative
- to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
+ NCHARS is the number of characters in the text annotated.
The format of the following elements depend on ANNOTATION_MASK.
In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
follows. */
-#define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \
+#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
do { \
*(buf)++ = -(len); \
*(buf)++ = (mask); \
- *(buf)++ = (from); \
- *(buf)++ = (to); \
+ *(buf)++ = (nchars); \
coding->annotated = 1; \
} while (0);
-#define ADD_COMPOSITION_DATA(buf, from, to, method) \
- do { \
- ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
- *buf++ = method; \
+#define ADD_COMPOSITION_DATA(buf, nchars, method) \
+ do { \
+ ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
+ *buf++ = method; \
} while (0)
-#define ADD_CHARSET_DATA(buf, from, to, id) \
- do { \
- ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
- *buf++ = id; \
+#define ADD_CHARSET_DATA(buf, nchars, id) \
+ do { \
+ ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
+ *buf++ = id; \
} while (0)
\f
struct coding_system *coding;
struct coding_detection_info *detect_info;
{
- const unsigned char *src = coding->source, *src_base = src;
+ const unsigned char *src = coding->source, *src_base;
const unsigned char *src_end = coding->source + coding->src_bytes;
int multibytep = coding->src_multibyte;
int consumed_chars = 0;
int found = 0;
- int incomplete;
detect_info->checked |= CATEGORY_MASK_UTF_8;
/* A coding system of this category is always ASCII compatible. */
{
int c, c1, c2, c3, c4;
- incomplete = 0;
+ src_base = src;
ONE_MORE_BYTE (c);
- if (UTF_8_1_OCTET_P (c))
+ if (c < 0 || UTF_8_1_OCTET_P (c))
continue;
- incomplete = 1;
ONE_MORE_BYTE (c1);
- if (! UTF_8_EXTRA_OCTET_P (c1))
+ if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
break;
if (UTF_8_2_OCTET_LEADING_P (c))
{
continue;
}
ONE_MORE_BYTE (c2);
- if (! UTF_8_EXTRA_OCTET_P (c2))
+ if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
break;
if (UTF_8_3_OCTET_LEADING_P (c))
{
continue;
}
ONE_MORE_BYTE (c3);
- if (! UTF_8_EXTRA_OCTET_P (c3))
+ if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
break;
if (UTF_8_4_OCTET_LEADING_P (c))
{
continue;
}
ONE_MORE_BYTE (c4);
- if (! UTF_8_EXTRA_OCTET_P (c4))
+ if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
break;
if (UTF_8_5_OCTET_LEADING_P (c))
{
return 0;
no_more_source:
- if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
+ if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
{
detect_info->rejected |= CATEGORY_MASK_UTF_8;
return 0;
const unsigned char *src = coding->source + coding->consumed;
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
- int *charbuf = coding->charbuf;
- int *charbuf_end = charbuf + coding->charbuf_size;
+ int *charbuf = coding->charbuf + coding->charbuf_used;
+ int *charbuf_end = coding->charbuf + coding->charbuf_size;
int consumed_chars = 0, consumed_chars_base;
int multibytep = coding->src_multibyte;
- Lisp_Object attr, eol_type, charset_list;
+ Lisp_Object attr, charset_list;
- CODING_GET_INFO (coding, attr, eol_type, charset_list);
+ CODING_GET_INFO (coding, attr, charset_list);
while (1)
{
break;
ONE_MORE_BYTE (c1);
- if (UTF_8_1_OCTET_P(c1))
+ if (c1 < 0)
+ {
+ c = - c1;
+ }
+ else if (UTF_8_1_OCTET_P(c1))
{
c = c1;
- if (c == '\r')
- {
- if (EQ (eol_type, Qdos))
- {
- if (src == src_end)
- {
- coding->result = CODING_RESULT_INSUFFICIENT_SRC;
- goto no_more_source;
- }
- if (*src == '\n')
- ONE_MORE_BYTE (c);
- }
- else if (EQ (eol_type, Qmac))
- c = '\n';
- }
}
else
{
ONE_MORE_BYTE (c2);
- if (! UTF_8_EXTRA_OCTET_P (c2))
+ if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
goto invalid_code;
if (UTF_8_2_OCTET_LEADING_P (c1))
{
else
{
ONE_MORE_BYTE (c3);
- if (! UTF_8_EXTRA_OCTET_P (c3))
+ if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
goto invalid_code;
if (UTF_8_3_OCTET_LEADING_P (c1))
{
else
{
ONE_MORE_BYTE (c4);
- if (! UTF_8_EXTRA_OCTET_P (c4))
+ if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
goto invalid_code;
if (UTF_8_4_OCTET_LEADING_P (c1))
{
else
{
ONE_MORE_BYTE (c5);
- if (! UTF_8_EXTRA_OCTET_P (c5))
+ if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
goto invalid_code;
if (UTF_8_5_OCTET_LEADING_P (c1))
{
{
ASSURE_DESTINATION (safe_room);
c = *charbuf++;
- dst += CHAR_STRING (c, dst);
+ if (CHAR_BYTE8_P (c))
+ *dst++ = CHAR_TO_BYTE8 (c);
+ else
+ dst += CHAR_STRING (c, dst);
produced_chars++;
}
}
- coding->result = CODING_RESULT_SUCCESS;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
coding->produced_char += produced_chars;
coding->produced = dst - coding->destination;
return 0;
int c1, c2;
detect_info->checked |= CATEGORY_MASK_UTF_16;
-
if (coding->mode & CODING_MODE_LAST_BLOCK
- && (coding->src_bytes & 1))
+ && (coding->src_chars & 1))
{
detect_info->rejected |= CATEGORY_MASK_UTF_16;
return 0;
}
+
ONE_MORE_BYTE (c1);
ONE_MORE_BYTE (c2);
-
if ((c1 == 0xFF) && (c2 == 0xFE))
{
detect_info->found |= (CATEGORY_MASK_UTF_16_LE
| CATEGORY_MASK_UTF_16_AUTO);
- detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
+ detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
+ | CATEGORY_MASK_UTF_16_BE_NOSIG
+ | CATEGORY_MASK_UTF_16_LE_NOSIG);
}
else if ((c1 == 0xFE) && (c2 == 0xFF))
{
detect_info->found |= (CATEGORY_MASK_UTF_16_BE
| CATEGORY_MASK_UTF_16_AUTO);
- detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
+ detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
+ | CATEGORY_MASK_UTF_16_BE_NOSIG
+ | CATEGORY_MASK_UTF_16_LE_NOSIG);
+ }
+ else if (c1 >= 0 && c2 >= 0)
+ {
+ detect_info->rejected
+ |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
}
no_more_source:
return 1;
const unsigned char *src = coding->source + coding->consumed;
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
- int *charbuf = coding->charbuf;
- int *charbuf_end = charbuf + coding->charbuf_size;
+ int *charbuf = coding->charbuf + coding->charbuf_used;
+ int *charbuf_end = coding->charbuf + coding->charbuf_size;
int consumed_chars = 0, consumed_chars_base;
int multibytep = coding->src_multibyte;
enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
int surrogate = CODING_UTF_16_SURROGATE (coding);
- Lisp_Object attr, eol_type, charset_list;
+ Lisp_Object attr, charset_list;
- CODING_GET_INFO (coding, attr, eol_type, charset_list);
+ CODING_GET_INFO (coding, attr, charset_list);
if (bom == utf_16_with_bom)
{
break;
ONE_MORE_BYTE (c1);
+ if (c1 < 0)
+ {
+ *charbuf++ = -c1;
+ continue;
+ }
ONE_MORE_BYTE (c2);
+ if (c2 < 0)
+ {
+ *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+ *charbuf++ = -c2;
+ continue;
+ }
c = (endian == utf_16_big_endian
? ((c1 << 8) | c2) : ((c2 << 8) | c1));
if (surrogate)
{
c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
- *charbuf++ = c;
+ *charbuf++ = 0x10000 + c;
}
}
else
enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
int produced_chars = 0;
- Lisp_Object attrs, eol_type, charset_list;
+ Lisp_Object attrs, charset_list;
int c;
- CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ CODING_GET_INFO (coding, attrs, charset_list);
if (bom != utf_16_without_bom)
{
EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
}
}
- coding->result = CODING_RESULT_SUCCESS;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
coding->produced = dst - coding->destination;
coding->produced_char += produced_chars;
return 0;
int
emacs_mule_char (coding, src, nbytes, nchars, id)
struct coding_system *coding;
- unsigned char *src;
+ const unsigned char *src;
int *nbytes, *nchars, *id;
{
const unsigned char *src_end = coding->source + coding->src_bytes;
int consumed_chars = 0;
ONE_MORE_BYTE (c);
- switch (emacs_mule_bytes[c])
+ if (c < 0)
{
- case 2:
- if (! (charset = emacs_mule_charset[c]))
- goto invalid_code;
- ONE_MORE_BYTE (c);
- code = c & 0x7F;
- break;
-
- case 3:
- if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
- || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
+ c = -c;
+ charset = emacs_mule_charset[0];
+ }
+ else
+ {
+ switch (emacs_mule_bytes[c])
{
- ONE_MORE_BYTE (c);
+ case 2:
if (! (charset = emacs_mule_charset[c]))
goto invalid_code;
ONE_MORE_BYTE (c);
+ if (c < 0xA0)
+ goto invalid_code;
code = c & 0x7F;
- }
- else
- {
- if (! (charset = emacs_mule_charset[c]))
+ break;
+
+ case 3:
+ if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
+ || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
+ {
+ ONE_MORE_BYTE (c);
+ if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
+ goto invalid_code;
+ ONE_MORE_BYTE (c);
+ if (c < 0xA0)
+ goto invalid_code;
+ code = c & 0x7F;
+ }
+ else
+ {
+ if (! (charset = emacs_mule_charset[c]))
+ goto invalid_code;
+ ONE_MORE_BYTE (c);
+ if (c < 0xA0)
+ goto invalid_code;
+ code = (c & 0x7F) << 8;
+ ONE_MORE_BYTE (c);
+ if (c < 0xA0)
+ goto invalid_code;
+ code |= c & 0x7F;
+ }
+ break;
+
+ case 4:
+ ONE_MORE_BYTE (c);
+ if (c < 0 || ! (charset = emacs_mule_charset[c]))
goto invalid_code;
ONE_MORE_BYTE (c);
+ if (c < 0xA0)
+ goto invalid_code;
code = (c & 0x7F) << 8;
ONE_MORE_BYTE (c);
+ if (c < 0xA0)
+ goto invalid_code;
code |= c & 0x7F;
- }
- break;
-
- case 4:
- ONE_MORE_BYTE (c);
- if (! (charset = emacs_mule_charset[c]))
- goto invalid_code;
- ONE_MORE_BYTE (c);
- code = (c & 0x7F) << 8;
- ONE_MORE_BYTE (c);
- code |= c & 0x7F;
- break;
+ break;
- case 1:
- code = c;
- charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
- ? charset_ascii : charset_eight_bit);
- break;
+ case 1:
+ code = c;
+ charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
+ ? charset_ascii : charset_eight_bit);
+ break;
- default:
- abort ();
+ default:
+ abort ();
+ }
+ c = DECODE_CHAR (charset, code);
+ if (c < 0)
+ goto invalid_code;
}
- c = DECODE_CHAR (charset, code);
- if (c < 0)
- goto invalid_code;
*nbytes = src - src_base;
*nchars = consumed_chars;
if (id)
struct coding_system *coding;
struct coding_detection_info *detect_info;
{
- const unsigned char *src = coding->source, *src_base = src;
+ const unsigned char *src = coding->source, *src_base;
const unsigned char *src_end = coding->source + coding->src_bytes;
int multibytep = coding->src_multibyte;
int consumed_chars = 0;
int c;
int found = 0;
- int incomplete;
detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
/* A coding system of this category is always ASCII compatible. */
while (1)
{
- incomplete = 0;
+ src_base = src;
ONE_MORE_BYTE (c);
- incomplete = 1;
-
+ if (c < 0)
+ continue;
if (c == 0x80)
{
/* Perhaps the start of composite character. We simple skip
}
else
{
- const unsigned char *src_base = src - 1;
+ int more_bytes = emacs_mule_bytes[*src_base] - 1;
- do
+ while (more_bytes > 0)
{
ONE_MORE_BYTE (c);
+ if (c < 0xA0)
+ {
+ src--; /* Unread the last byte. */
+ break;
+ }
+ more_bytes--;
}
- while (c >= 0xA0);
- if (src - src_base != emacs_mule_bytes[*src_base])
+ if (more_bytes != 0)
break;
found = CATEGORY_MASK_EMACS_MULE;
}
return 0;
no_more_source:
- if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
+ if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
{
detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
return 0;
number of characters composed by this composition. */ \
enum composition_method method = c - 0xF2; \
int *charbuf_base = charbuf; \
- int from, to; \
int consumed_chars_limit; \
int nbytes, nchars; \
\
ONE_MORE_BYTE (c); \
+ if (c < 0) \
+ goto invalid_code; \
nbytes = c - 0xA0; \
if (nbytes < 3) \
goto invalid_code; \
ONE_MORE_BYTE (c); \
+ if (c < 0) \
+ goto invalid_code; \
nchars = c - 0xA0; \
- from = coding->produced + char_offset; \
- to = from + nchars; \
- ADD_COMPOSITION_DATA (charbuf, from, to, method); \
+ ADD_COMPOSITION_DATA (charbuf, nchars, method); \
consumed_chars_limit = consumed_chars_base + nbytes; \
if (method != COMPOSITION_RELATIVE) \
{ \
int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
int *buf = components; \
int i, j; \
- int from, to; \
\
src = src_base; \
ONE_MORE_BYTE (c); /* skip 0x80 */ \
DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
if (i < 2) \
goto invalid_code; \
- from = coding->produced_char + char_offset; \
- to = from + i; \
- ADD_COMPOSITION_DATA (charbuf, from, to, method); \
+ ADD_COMPOSITION_DATA (charbuf, i, method); \
for (j = 0; j < i; j++) \
*charbuf++ = components[j]; \
} while (0)
int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
int *buf = components; \
int i, j; \
- int from, to; \
\
DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
goto invalid_code; \
if (charbuf + i + (i / 2) + 1 < charbuf_end) \
goto no_more_source; \
- from = coding->produced_char + char_offset; \
- to = from + i; \
- ADD_COMPOSITION_DATA (buf, from, to, method); \
+ ADD_COMPOSITION_DATA (buf, i, method); \
for (j = 0; j < i; j++) \
*charbuf++ = components[j]; \
for (j = 0; j < i; j += 2) \
const unsigned char *src = coding->source + coding->consumed;
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
- int *charbuf = coding->charbuf;
- int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+ int *charbuf = coding->charbuf + coding->charbuf_used;
+ int *charbuf_end
+ = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
int consumed_chars = 0, consumed_chars_base;
int multibytep = coding->src_multibyte;
- Lisp_Object attrs, eol_type, charset_list;
+ Lisp_Object attrs, charset_list;
int char_offset = coding->produced_char;
int last_offset = char_offset;
int last_id = charset_ascii;
- CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ CODING_GET_INFO (coding, attrs, charset_list);
while (1)
{
break;
ONE_MORE_BYTE (c);
-
- if (c < 0x80)
+ if (c < 0)
+ {
+ *charbuf++ = -c;
+ char_offset++;
+ }
+ else if (c < 0x80)
{
- if (c == '\r')
- {
- if (EQ (eol_type, Qdos))
- {
- if (src == src_end)
- {
- coding->result = CODING_RESULT_INSUFFICIENT_SRC;
- goto no_more_source;
- }
- if (*src == '\n')
- ONE_MORE_BYTE (c);
- }
- else if (EQ (eol_type, Qmac))
- c = '\n';
- }
*charbuf++ = c;
char_offset++;
}
else if (c == 0x80)
{
ONE_MORE_BYTE (c);
+ if (c < 0)
+ goto invalid_code;
if (c - 0xF2 >= COMPOSITION_RELATIVE
&& c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
DECODE_EMACS_MULE_21_COMPOSITION (c);
if (last_id != id)
{
if (last_id != charset_ascii)
- ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
last_id = id;
last_offset = char_offset;
}
no_more_source:
if (last_id != charset_ascii)
- ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
coding->consumed_char += consumed_chars_base;
coding->consumed = src_base - coding->source;
coding->charbuf_used = charbuf - coding->charbuf;
unsigned char *dst_end = coding->destination + coding->dst_bytes;
int safe_room = 8;
int produced_chars = 0;
- Lisp_Object attrs, eol_type, charset_list;
+ Lisp_Object attrs, charset_list;
int c;
int preferred_charset_id = -1;
- CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ CODING_GET_INFO (coding, attrs, charset_list);
+ if (! EQ (charset_list, Vemacs_mule_charset_list))
+ {
+ CODING_ATTR_CHARSET_LIST (attrs)
+ = charset_list = Vemacs_mule_charset_list;
+ }
while (charbuf < charbuf_end)
{
if (leading_codes[1])
EMIT_ONE_BYTE (leading_codes[1]);
if (dimension == 1)
- EMIT_ONE_BYTE (code);
+ EMIT_ONE_BYTE (code | 0x80);
else
{
+ code |= 0x8080;
EMIT_ONE_BYTE (code >> 8);
EMIT_ONE_BYTE (code & 0xFF);
}
}
}
- coding->result = CODING_RESULT_SUCCESS;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
coding->produced_char += produced_chars;
coding->produced = dst - coding->destination;
return 0;
while (rejected != CATEGORY_MASK_ISO)
{
+ src_base = src;
ONE_MORE_BYTE (c);
switch (c)
{
found |= CATEGORY_MASK_ISO_8_ELSE;
goto check_extra_latin;
-
case ISO_CODE_SS2:
case ISO_CODE_SS3:
/* Single shift. */
if (inhibit_iso_escape_detection)
break;
- single_shifting = 1;
+ single_shifting = 0;
rejected |= CATEGORY_MASK_ISO_7BIT;
if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
& CODING_ISO_FLAG_SINGLE_SHIFT)
- found |= CATEGORY_MASK_ISO_8_1;
+ found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
& CODING_ISO_FLAG_SINGLE_SHIFT)
- found |= CATEGORY_MASK_ISO_8_2;
+ found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
+ if (single_shifting)
+ break;
goto check_extra_latin;
default:
+ if (c < 0)
+ continue;
if (c < 0x80)
{
single_shifting = 0;
found |= CATEGORY_MASK_ISO_8_1;
else
rejected |= CATEGORY_MASK_ISO_8_1;
- if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
- & CODING_ISO_FLAG_LATIN_EXTRA)
- found |= CATEGORY_MASK_ISO_8_2;
- else
- rejected |= CATEGORY_MASK_ISO_8_2;
+ rejected |= CATEGORY_MASK_ISO_8_2;
}
}
detect_info->rejected |= CATEGORY_MASK_ISO;
}
-/* Set designation state into CODING. */
+/* Set designation state into CODING. Set CHARS_96 to -1 if the
+ escape sequence should be kept. */
#define DECODE_DESIGNATION(reg, dim, chars_96, final) \
do { \
int id, prev; \
|| !SAFE_CHARSET_P (coding, id)) \
{ \
CODING_ISO_DESIGNATION (coding, reg) = -2; \
- goto invalid_code; \
+ chars_96 = -1; \
+ break; \
} \
prev = CODING_ISO_DESIGNATION (coding, reg); \
if (id == charset_jisx0201_roman) \
designation is ASCII to REG, we should keep this designation \
sequence. */ \
if (prev == -2 && id == charset_ascii) \
- goto invalid_code; \
+ chars_96 = -1; \
} while (0)
: (component_idx + 1) / 2); \
int i; \
int *saved_charbuf = charbuf; \
- int from = char_offset; \
- int to = from + nchars; \
\
- ADD_COMPOSITION_DATA (charbuf, from, to, method); \
+ ADD_COMPOSITION_DATA (charbuf, nchars, method); \
if (method != COMPOSITION_RELATIVE) \
{ \
if (component_len == 0) \
const unsigned char *src = coding->source + coding->consumed;
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
- int *charbuf = coding->charbuf;
+ int *charbuf = coding->charbuf + coding->charbuf_used;
int *charbuf_end
- = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
+ = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
int consumed_chars = 0, consumed_chars_base;
int multibytep = coding->src_multibyte;
/* Charsets invoked to graphic plane 0 and 1 respectively. */
int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
+ int charset_id_2, charset_id_3;
struct charset *charset;
int c;
/* For handling composition sequence. */
int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
int component_idx;
int component_len;
- Lisp_Object attrs, eol_type, charset_list;
+ Lisp_Object attrs, charset_list;
int char_offset = coding->produced_char;
int last_offset = char_offset;
int last_id = charset_ascii;
- CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ CODING_GET_INFO (coding, attrs, charset_list);
setup_iso_safe_charsets (attrs);
while (1)
break;
ONE_MORE_BYTE (c1);
+ if (c1 < 0)
+ goto invalid_code;
/* We produce at most one character. */
switch (iso_code_class [c1])
continue;
}
}
- charset = CHARSET_FROM_ID (charset_id_0);
+ if (charset_id_0 < 0)
+ charset = CHARSET_FROM_ID (charset_ascii);
+ else
+ charset = CHARSET_FROM_ID (charset_id_0);
break;
case ISO_0xA0_or_0xFF:
charset = CHARSET_FROM_ID (charset_id_1);
break;
- case ISO_carriage_return:
- if (c1 == '\r')
- {
- if (EQ (eol_type, Qdos))
- {
- if (src == src_end)
- {
- coding->result = CODING_RESULT_INSUFFICIENT_SRC;
- goto no_more_source;
- }
- if (*src == '\n')
- ONE_MORE_BYTE (c1);
- }
- else if (EQ (eol_type, Qmac))
- c1 = '\n';
- }
- /* fall through */
-
case ISO_control_0:
MAYBE_FINISH_COMPOSITION ();
charset = CHARSET_FROM_ID (charset_ascii);
case '$': /* designation of 2-byte character set */
if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
goto invalid_code;
- ONE_MORE_BYTE (c1);
- if (c1 >= '@' && c1 <= 'B')
- { /* designation of JISX0208.1978, GB2312.1980,
+ {
+ int reg, chars96;
+
+ ONE_MORE_BYTE (c1);
+ if (c1 >= '@' && c1 <= 'B')
+ { /* designation of JISX0208.1978, GB2312.1980,
or JISX0208.1980 */
- DECODE_DESIGNATION (0, 2, 0, c1);
- }
- else if (c1 >= 0x28 && c1 <= 0x2B)
- { /* designation of DIMENSION2_CHARS94 character set */
- ONE_MORE_BYTE (c2);
- DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
- }
- else if (c1 >= 0x2C && c1 <= 0x2F)
- { /* designation of DIMENSION2_CHARS96 character set */
- ONE_MORE_BYTE (c2);
- DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
- }
- else
- goto invalid_code;
- /* We must update these variables now. */
- charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
- charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
+ reg = 0, chars96 = 0;
+ }
+ else if (c1 >= 0x28 && c1 <= 0x2B)
+ { /* designation of DIMENSION2_CHARS94 character set */
+ reg = c1 - 0x28, chars96 = 0;
+ ONE_MORE_BYTE (c1);
+ }
+ else if (c1 >= 0x2C && c1 <= 0x2F)
+ { /* designation of DIMENSION2_CHARS96 character set */
+ reg = c1 - 0x2C, chars96 = 1;
+ ONE_MORE_BYTE (c1);
+ }
+ else
+ goto invalid_code;
+ DECODE_DESIGNATION (reg, 2, chars96, c1);
+ /* We must update these variables now. */
+ if (reg == 0)
+ charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
+ else if (reg == 1)
+ charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
+ if (chars96 < 0)
+ goto invalid_code;
+ }
continue;
case 'n': /* invocation of locking-shift-2 */
if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
|| CODING_ISO_DESIGNATION (coding, 2) < 0)
goto invalid_code;
- charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
+ charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
+ if (charset_id_2 < 0)
+ charset = CHARSET_FROM_ID (charset_ascii);
+ else
+ charset = CHARSET_FROM_ID (charset_id_2);
ONE_MORE_BYTE (c1);
if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
goto invalid_code;
if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
|| CODING_ISO_DESIGNATION (coding, 3) < 0)
goto invalid_code;
- charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
+ charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
+ if (charset_id_3 < 0)
+ charset = CHARSET_FROM_ID (charset_ascii);
+ else
+ charset = CHARSET_FROM_ID (charset_id_3);
ONE_MORE_BYTE (c1);
if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
goto invalid_code;
&& src + 1 < src_end
&& src[0] == '%'
&& src[1] == '@')
- break;
+ {
+ src += 2;
+ break;
+ }
*p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
}
if (p + 3 > charbuf_end)
default:
if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
goto invalid_code;
- if (c1 >= 0x28 && c1 <= 0x2B)
- { /* designation of DIMENSION1_CHARS94 character set */
- ONE_MORE_BYTE (c2);
- DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
- }
- else if (c1 >= 0x2C && c1 <= 0x2F)
- { /* designation of DIMENSION1_CHARS96 character set */
- ONE_MORE_BYTE (c2);
- DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
- }
- else
- goto invalid_code;
- /* We must update these variables now. */
- charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
- charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
+ {
+ int reg, chars96;
+
+ if (c1 >= 0x28 && c1 <= 0x2B)
+ { /* designation of DIMENSION1_CHARS94 character set */
+ reg = c1 - 0x28, chars96 = 0;
+ ONE_MORE_BYTE (c1);
+ }
+ else if (c1 >= 0x2C && c1 <= 0x2F)
+ { /* designation of DIMENSION1_CHARS96 character set */
+ reg = c1 - 0x2C, chars96 = 1;
+ ONE_MORE_BYTE (c1);
+ }
+ else
+ goto invalid_code;
+ DECODE_DESIGNATION (reg, 1, chars96, c1);
+ /* We must update these variables now. */
+ if (reg == 0)
+ charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
+ else if (reg == 1)
+ charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
+ if (chars96 < 0)
+ goto invalid_code;
+ }
continue;
}
}
&& last_id != charset->id)
{
if (last_id != charset_ascii)
- ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
last_id = charset->id;
last_offset = char_offset;
}
src = src_base;
consumed_chars = consumed_chars_base;
ONE_MORE_BYTE (c);
- *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
char_offset++;
coding->errors++;
continue;
no_more_source:
if (last_id != charset_ascii)
- ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
coding->consumed_char += consumed_chars_base;
coding->consumed = src_base - coding->source;
coding->charbuf_used = charbuf - coding->charbuf;
int c;
int preferred_charset_id = -1;
- CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ CODING_GET_INFO (coding, attrs, charset_list);
+ eol_type = CODING_ID_EOL_TYPE (coding->id);
+ if (VECTORP (eol_type))
+ eol_type = Qunix;
+
setup_iso_safe_charsets (attrs);
/* Charset list may have been changed. */
charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
ASSURE_DESTINATION (safe_room);
ENCODE_RESET_PLANE_AND_REGISTER ();
}
- coding->result = CODING_RESULT_SUCCESS;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
CODING_ISO_BOL (coding) = bol_designation;
coding->produced_char += produced_chars;
coding->produced = dst - coding->destination;
struct coding_system *coding;
struct coding_detection_info *detect_info;
{
- const unsigned char *src = coding->source, *src_base = src;
+ const unsigned char *src = coding->source, *src_base;
const unsigned char *src_end = coding->source + coding->src_bytes;
int multibytep = coding->src_multibyte;
int consumed_chars = 0;
int found = 0;
int c;
- int incomplete;
detect_info->checked |= CATEGORY_MASK_SJIS;
/* A coding system of this category is always ASCII compatible. */
while (1)
{
- incomplete = 0;
+ src_base = src;
ONE_MORE_BYTE (c);
- incomplete = 1;
if (c < 0x80)
continue;
if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
return 0;
no_more_source:
- if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
+ if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
{
detect_info->rejected |= CATEGORY_MASK_SJIS;
return 0;
struct coding_system *coding;
struct coding_detection_info *detect_info;
{
- const unsigned char *src = coding->source, *src_base = src;
+ const unsigned char *src = coding->source, *src_base;
const unsigned char *src_end = coding->source + coding->src_bytes;
int multibytep = coding->src_multibyte;
int consumed_chars = 0;
int found = 0;
int c;
- int incomplete;
detect_info->checked |= CATEGORY_MASK_BIG5;
/* A coding system of this category is always ASCII compatible. */
while (1)
{
- incomplete = 0;
+ src_base = src;
ONE_MORE_BYTE (c);
- incomplete = 1;
if (c < 0x80)
continue;
if (c >= 0xA1)
return 0;
no_more_source:
- if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
+ if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
{
detect_info->rejected |= CATEGORY_MASK_BIG5;
return 0;
const unsigned char *src = coding->source + coding->consumed;
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
- int *charbuf = coding->charbuf;
- int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+ int *charbuf = coding->charbuf + coding->charbuf_used;
+ int *charbuf_end
+ = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
int consumed_chars = 0, consumed_chars_base;
int multibytep = coding->src_multibyte;
struct charset *charset_roman, *charset_kanji, *charset_kana;
- Lisp_Object attrs, eol_type, charset_list, val;
+ struct charset *charset_kanji2;
+ Lisp_Object attrs, charset_list, val;
int char_offset = coding->produced_char;
int last_offset = char_offset;
int last_id = charset_ascii;
- CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ CODING_GET_INFO (coding, attrs, charset_list);
val = charset_list;
charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
- charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
+ charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
while (1)
{
int c, c1;
+ struct charset *charset;
src_base = src;
consumed_chars_base = consumed_chars;
break;
ONE_MORE_BYTE (c);
-
- if (c == '\r')
+ if (c < 0)
+ goto invalid_code;
+ if (c < 0x80)
+ charset = charset_roman;
+ else if (c == 0x80 || c == 0xA0)
+ goto invalid_code;
+ else if (c >= 0xA1 && c <= 0xDF)
{
- if (EQ (eol_type, Qdos))
- {
- if (src == src_end)
- {
- coding->result = CODING_RESULT_INSUFFICIENT_SRC;
- goto no_more_source;
- }
- if (*src == '\n')
- ONE_MORE_BYTE (c);
- }
- else if (EQ (eol_type, Qmac))
- c = '\n';
+ /* SJIS -> JISX0201-Kana */
+ c &= 0x7F;
+ charset = charset_kana;
+ }
+ else if (c <= 0xEF)
+ {
+ /* SJIS -> JISX0208 */
+ ONE_MORE_BYTE (c1);
+ if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
+ goto invalid_code;
+ c = (c << 8) | c1;
+ SJIS_TO_JIS (c);
+ charset = charset_kanji;
+ }
+ else if (c <= 0xFC && charset_kanji2)
+ {
+ /* SJIS -> JISX0213-2 */
+ ONE_MORE_BYTE (c1);
+ if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
+ goto invalid_code;
+ c = (c << 8) | c1;
+ SJIS_TO_JIS2 (c);
+ charset = charset_kanji2;
}
else
+ goto invalid_code;
+ if (charset->id != charset_ascii
+ && last_id != charset->id)
{
- struct charset *charset;
-
- if (c < 0x80)
- charset = charset_roman;
- else
- {
- if (c >= 0xF0)
- goto invalid_code;
- if (c < 0xA0 || c >= 0xE0)
- {
- /* SJIS -> JISX0208 */
- ONE_MORE_BYTE (c1);
- if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
- goto invalid_code;
- c = (c << 8) | c1;
- SJIS_TO_JIS (c);
- charset = charset_kanji;
- }
- else if (c > 0xA0)
- {
- /* SJIS -> JISX0201-Kana */
- c &= 0x7F;
- charset = charset_kana;
- }
- else
- goto invalid_code;
- }
- if (charset->id != charset_ascii
- && last_id != charset->id)
- {
- if (last_id != charset_ascii)
- ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
- last_id = charset->id;
- last_offset = char_offset;
- }
- CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
+ last_id = charset->id;
+ last_offset = char_offset;
}
+ CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
*charbuf++ = c;
char_offset++;
continue;
src = src_base;
consumed_chars = consumed_chars_base;
ONE_MORE_BYTE (c);
- *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
char_offset++;
coding->errors++;
}
no_more_source:
if (last_id != charset_ascii)
- ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
coding->consumed_char += consumed_chars_base;
coding->consumed = src_base - coding->source;
coding->charbuf_used = charbuf - coding->charbuf;
const unsigned char *src = coding->source + coding->consumed;
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
- int *charbuf = coding->charbuf;
- int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+ int *charbuf = coding->charbuf + coding->charbuf_used;
+ int *charbuf_end
+ = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
int consumed_chars = 0, consumed_chars_base;
int multibytep = coding->src_multibyte;
struct charset *charset_roman, *charset_big5;
- Lisp_Object attrs, eol_type, charset_list, val;
+ Lisp_Object attrs, charset_list, val;
int char_offset = coding->produced_char;
int last_offset = char_offset;
int last_id = charset_ascii;
- CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ CODING_GET_INFO (coding, attrs, charset_list);
val = charset_list;
charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
while (1)
{
int c, c1;
+ struct charset *charset;
src_base = src;
consumed_chars_base = consumed_chars;
ONE_MORE_BYTE (c);
- if (c == '\r')
+ if (c < 0)
+ goto invalid_code;
+ if (c < 0x80)
+ charset = charset_roman;
+ else
{
- if (EQ (eol_type, Qdos))
- {
- if (src == src_end)
- {
- coding->result = CODING_RESULT_INSUFFICIENT_SRC;
- goto no_more_source;
- }
- if (*src == '\n')
- ONE_MORE_BYTE (c);
- }
- else if (EQ (eol_type, Qmac))
- c = '\n';
+ /* BIG5 -> Big5 */
+ if (c < 0xA1 || c > 0xFE)
+ goto invalid_code;
+ ONE_MORE_BYTE (c1);
+ if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
+ goto invalid_code;
+ c = c << 8 | c1;
+ charset = charset_big5;
}
- else
+ if (charset->id != charset_ascii
+ && last_id != charset->id)
{
- struct charset *charset;
- if (c < 0x80)
- charset = charset_roman;
- else
- {
- /* BIG5 -> Big5 */
- if (c < 0xA1 || c > 0xFE)
- goto invalid_code;
- ONE_MORE_BYTE (c1);
- if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
- goto invalid_code;
- c = c << 8 | c1;
- charset = charset_big5;
- }
- if (charset->id != charset_ascii
- && last_id != charset->id)
- {
- if (last_id != charset_ascii)
- ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
- last_id = charset->id;
- last_offset = char_offset;
- }
- CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
+ last_id = charset->id;
+ last_offset = char_offset;
}
-
+ CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
*charbuf++ = c;
char_offset++;
continue;
src = src_base;
consumed_chars = consumed_chars_base;
ONE_MORE_BYTE (c);
- *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
char_offset++;
coding->errors++;
}
no_more_source:
if (last_id != charset_ascii)
- ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
coding->consumed_char += consumed_chars_base;
coding->consumed = src_base - coding->source;
coding->charbuf_used = charbuf - coding->charbuf;
unsigned char *dst_end = coding->destination + coding->dst_bytes;
int safe_room = 4;
int produced_chars = 0;
- Lisp_Object attrs, eol_type, charset_list, val;
+ Lisp_Object attrs, charset_list, val;
int ascii_compatible;
struct charset *charset_roman, *charset_kanji, *charset_kana;
+ struct charset *charset_kanji2;
int c;
- CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ CODING_GET_INFO (coding, attrs, charset_list);
val = charset_list;
charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
- charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
+ charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
+ charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
}
else if (charset == charset_kana)
EMIT_ONE_BYTE (code | 0x80);
+ else if (charset_kanji2 && charset == charset_kanji2)
+ {
+ int c1, c2;
+
+ c1 = code >> 8;
+ if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
+ || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
+ {
+ JIS_TO_SJIS2 (code);
+ c1 = code >> 8, c2 = code & 0xFF;
+ EMIT_TWO_BYTES (c1, c2);
+ }
+ else
+ EMIT_ONE_ASCII_BYTE (code & 0x7F);
+ }
else
EMIT_ONE_ASCII_BYTE (code & 0x7F);
}
}
- coding->result = CODING_RESULT_SUCCESS;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
coding->produced_char += produced_chars;
coding->produced = dst - coding->destination;
return 0;
unsigned char *dst_end = coding->destination + coding->dst_bytes;
int safe_room = 4;
int produced_chars = 0;
- Lisp_Object attrs, eol_type, charset_list, val;
+ Lisp_Object attrs, charset_list, val;
int ascii_compatible;
struct charset *charset_roman, *charset_big5;
int c;
- CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ CODING_GET_INFO (coding, attrs, charset_list);
val = charset_list;
charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
EMIT_ONE_ASCII_BYTE (code & 0x7F);
}
}
- coding->result = CODING_RESULT_SUCCESS;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
coding->produced_char += produced_chars;
coding->produced = dst - coding->destination;
return 0;
struct coding_system *coding;
struct coding_detection_info *detect_info;
{
- const unsigned char *src = coding->source, *src_base = src;
+ const unsigned char *src = coding->source, *src_base;
const unsigned char *src_end = coding->source + coding->src_bytes;
int multibytep = coding->src_multibyte;
int consumed_chars = 0;
int found = 0;
- unsigned char *valids = CODING_CCL_VALIDS (coding);
+ unsigned char *valids;
int head_ascii = coding->head_ascii;
Lisp_Object attrs;
detect_info->checked |= CATEGORY_MASK_CCL;
coding = &coding_categories[coding_category_ccl];
+ valids = CODING_CCL_VALIDS (coding);
attrs = CODING_ID_ATTRS (coding->id);
if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
src += head_ascii;
while (1)
{
int c;
+
+ src_base = src;
ONE_MORE_BYTE (c);
- if (! valids[c])
+ if (c < 0 || ! valids[c])
break;
if ((valids[c] > 1))
found = CATEGORY_MASK_CCL;
{
const unsigned char *src = coding->source + coding->consumed;
const unsigned char *src_end = coding->source + coding->src_bytes;
- int *charbuf = coding->charbuf;
- int *charbuf_end = charbuf + coding->charbuf_size;
+ int *charbuf = coding->charbuf + coding->charbuf_used;
+ int *charbuf_end = coding->charbuf + coding->charbuf_size;
int consumed_chars = 0;
int multibytep = coding->src_multibyte;
struct ccl_program ccl;
int source_charbuf[1024];
int source_byteidx[1024];
- Lisp_Object attrs, eol_type, charset_list;
+ Lisp_Object attrs, charset_list;
- CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ CODING_GET_INFO (coding, attrs, charset_list);
setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
while (src < src_end)
switch (ccl.status)
{
case CCL_STAT_SUSPEND_BY_SRC:
- coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+ record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
break;
case CCL_STAT_SUSPEND_BY_DST:
break;
case CCL_STAT_QUIT:
case CCL_STAT_INVALID_CMD:
- coding->result = CODING_RESULT_INTERRUPT;
+ record_conversion_result (coding, CODING_RESULT_INTERRUPT);
break;
default:
- coding->result = CODING_RESULT_SUCCESS;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
break;
}
coding->consumed_char += consumed_chars;
unsigned char *adjusted_dst_end = dst_end - 1;
int destination_charbuf[1024];
int i, produced_chars = 0;
- Lisp_Object attrs, eol_type, charset_list;
+ Lisp_Object attrs, charset_list;
- CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ CODING_GET_INFO (coding, attrs, charset_list);
setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
switch (ccl.status)
{
case CCL_STAT_SUSPEND_BY_SRC:
- coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+ record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
break;
case CCL_STAT_SUSPEND_BY_DST:
- coding->result = CODING_RESULT_INSUFFICIENT_DST;
+ record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
break;
case CCL_STAT_QUIT:
case CCL_STAT_INVALID_CMD:
- coding->result = CODING_RESULT_INTERRUPT;
+ record_conversion_result (coding, CODING_RESULT_INTERRUPT);
break;
default:
- coding->result = CODING_RESULT_SUCCESS;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
break;
}
coding->chars_at_source = 1;
coding->consumed_char = 0;
coding->consumed = 0;
- coding->result = CODING_RESULT_SUCCESS;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
}
static int
produced_chars = dst - (coding->destination + coding->dst_bytes);
}
}
- coding->result = CODING_RESULT_SUCCESS;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
coding->produced_char += produced_chars;
coding->produced = dst - coding->destination;
return 0;
struct coding_system *coding;
struct coding_detection_info *detect_info;
{
- const unsigned char *src = coding->source, *src_base = src;
+ const unsigned char *src = coding->source, *src_base;
const unsigned char *src_end = coding->source + coding->src_bytes;
int multibytep = coding->src_multibyte;
int consumed_chars = 0;
{
int c;
+ src_base = src;
ONE_MORE_BYTE (c);
+ if (c < 0)
+ continue;
if (NILP (AREF (valids, c)))
break;
if (c >= 0x80)
const unsigned char *src = coding->source + coding->consumed;
const unsigned char *src_end = coding->source + coding->src_bytes;
const unsigned char *src_base;
- int *charbuf = coding->charbuf;
- int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+ int *charbuf = coding->charbuf + coding->charbuf_used;
+ int *charbuf_end
+ = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
int consumed_chars = 0, consumed_chars_base;
int multibytep = coding->src_multibyte;
- Lisp_Object attrs, eol_type, charset_list, valids;
+ Lisp_Object attrs, charset_list, valids;
int char_offset = coding->produced_char;
int last_offset = char_offset;
int last_id = charset_ascii;
- CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ CODING_GET_INFO (coding, attrs, charset_list);
valids = AREF (attrs, coding_attr_charset_valids);
while (1)
{
int c;
+ Lisp_Object val;
+ struct charset *charset;
+ int dim;
+ int len = 1;
+ unsigned code;
src_base = src;
consumed_chars_base = consumed_chars;
break;
ONE_MORE_BYTE (c);
- if (c == '\r')
+ if (c < 0)
+ goto invalid_code;
+ code = c;
+
+ val = AREF (valids, c);
+ if (NILP (val))
+ goto invalid_code;
+ if (INTEGERP (val))
{
- /* Here we assume that no charset maps '\r' to something
- else. */
- if (EQ (eol_type, Qdos))
+ charset = CHARSET_FROM_ID (XFASTINT (val));
+ dim = CHARSET_DIMENSION (charset);
+ while (len < dim)
{
- if (src == src_end)
- {
- coding->result = CODING_RESULT_INSUFFICIENT_SRC;
- goto no_more_source;
- }
- if (*src == '\n')
- ONE_MORE_BYTE (c);
+ ONE_MORE_BYTE (c);
+ code = (code << 8) | c;
+ len++;
}
- else if (EQ (eol_type, Qmac))
- c = '\n';
+ CODING_DECODE_CHAR (coding, src, src_base, src_end,
+ charset, code, c);
}
else
{
- Lisp_Object val;
- struct charset *charset;
- int dim;
- int len = 1;
- unsigned code = c;
-
- val = AREF (valids, c);
- if (NILP (val))
- goto invalid_code;
- if (INTEGERP (val))
+ /* VAL is a list of charset IDs. It is assured that the
+ list is sorted by charset dimensions (smaller one
+ comes first). */
+ while (CONSP (val))
{
- charset = CHARSET_FROM_ID (XFASTINT (val));
+ charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
dim = CHARSET_DIMENSION (charset);
while (len < dim)
{
code = (code << 8) | c;
len++;
}
- CODING_DECODE_CHAR (coding, src, src_base, src_end,
- charset, code, c);
- }
- else
- {
- /* VAL is a list of charset IDs. It is assured that the
- list is sorted by charset dimensions (smaller one
- comes first). */
- while (CONSP (val))
- {
- charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
- dim = CHARSET_DIMENSION (charset);
- while (len < dim)
- {
- ONE_MORE_BYTE (c);
- code = (code << 8) | c;
- len++;
- }
- CODING_DECODE_CHAR (coding, src, src_base,
- src_end, charset, code, c);
- if (c >= 0)
- break;
- val = XCDR (val);
- }
- }
- if (c < 0)
- goto invalid_code;
- if (charset->id != charset_ascii
- && last_id != charset->id)
- {
- if (last_id != charset_ascii)
- ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
- last_id = charset->id;
- last_offset = char_offset;
+ CODING_DECODE_CHAR (coding, src, src_base,
+ src_end, charset, code, c);
+ if (c >= 0)
+ break;
+ val = XCDR (val);
}
}
+ if (c < 0)
+ goto invalid_code;
+ if (charset->id != charset_ascii
+ && last_id != charset->id)
+ {
+ if (last_id != charset_ascii)
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
+ last_id = charset->id;
+ last_offset = char_offset;
+ }
+
*charbuf++ = c;
char_offset++;
continue;
src = src_base;
consumed_chars = consumed_chars_base;
ONE_MORE_BYTE (c);
- *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+ *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
char_offset++;
coding->errors++;
}
no_more_source:
if (last_id != charset_ascii)
- ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+ ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
coding->consumed_char += consumed_chars_base;
coding->consumed = src_base - coding->source;
coding->charbuf_used = charbuf - coding->charbuf;
unsigned char *dst_end = coding->destination + coding->dst_bytes;
int safe_room = MAX_MULTIBYTE_LENGTH;
int produced_chars = 0;
- Lisp_Object attrs, eol_type, charset_list;
+ Lisp_Object attrs, charset_list;
int ascii_compatible;
int c;
- CODING_GET_INFO (coding, attrs, eol_type, charset_list);
+ CODING_GET_INFO (coding, attrs, charset_list);
ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
while (charbuf < charbuf_end)
}
}
- coding->result = CODING_RESULT_SUCCESS;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
coding->produced_char += produced_chars;
coding->produced = dst - coding->destination;
return 0;
coding->detector = NULL;
coding->decoder = decode_coding_raw_text;
coding->encoder = encode_coding_raw_text;
+ if (! EQ (eol_type, Qunix))
+ {
+ coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
+ if (! VECTORP (eol_type))
+ coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
+ }
+
}
return;
coding_inherit_eol_type (coding_system, parent)
Lisp_Object coding_system, parent;
{
- Lisp_Object spec, attrs, eol_type;
+ Lisp_Object spec, eol_type;
if (NILP (coding_system))
coding_system = Qraw_text;
spec = CODING_SYSTEM_SPEC (coding_system);
- attrs = AREF (spec, 0);
eol_type = AREF (spec, 2);
if (VECTORP (eol_type)
&& ! NILP (parent))
o coding-category-utf-8
The category for a coding system which has the same code range
- as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
+ as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
symbol) `utf-8' by default.
o coding-category-utf-16-be
static int
detect_eol (source, src_bytes, category)
- unsigned char *source;
+ const unsigned char *source;
EMACS_INT src_bytes;
enum coding_category category;
{
- unsigned char *src = source, *src_end = src + src_bytes;
+ const unsigned char *src = source, *src_end = src + src_bytes;
unsigned char c;
int total = 0;
int eol_seen = EOL_SEEN_NONE;
}
-static void
+static Lisp_Object
adjust_coding_eol_type (coding, eol_seen)
struct coding_system *coding;
int eol_seen;
eol_type = CODING_ID_EOL_TYPE (coding->id);
if (eol_seen & EOL_SEEN_LF)
- coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
+ {
+ coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
+ eol_type = Qunix;
+ }
else if (eol_seen & EOL_SEEN_CRLF)
- coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
+ {
+ coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
+ eol_type = Qdos;
+ }
else if (eol_seen & EOL_SEEN_CR)
- coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
+ {
+ coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
+ eol_type = Qmac;
+ }
+ return eol_type;
}
/* Detect how a text specified in CODING is encoded. If a coding
struct coding_system *coding;
{
const unsigned char *src, *src_end;
- Lisp_Object attrs, coding_type;
coding->consumed = coding->consumed_char = 0;
coding->produced = coding->produced_char = 0;
if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
{
int c, i;
+ struct coding_detection_info detect_info;
- for (src = coding->source; src < src_end; src++)
+ detect_info.checked = detect_info.found = detect_info.rejected = 0;
+ for (i = 0, src = coding->source; src < src_end; i++, src++)
{
c = *src;
- if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
- || c == ISO_CODE_SI
- || c == ISO_CODE_SO)))
+ if (c & 0x80)
break;
+ if (c < 0x20
+ && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+ && ! inhibit_iso_escape_detection
+ && ! detect_info.checked)
+ {
+ coding->head_ascii = src - (coding->source + coding->consumed);
+ if (detect_coding_iso_2022 (coding, &detect_info))
+ {
+ /* We have scanned the whole data. */
+ if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
+ /* We didn't find an 8-bit code. */
+ src = src_end;
+ break;
+ }
+ }
}
coding->head_ascii = src - (coding->source + coding->consumed);
- if (coding->head_ascii < coding->src_bytes)
+ if (coding->head_ascii < coding->src_bytes
+ || detect_info.found)
{
- struct coding_detection_info detect_info;
enum coding_category category;
struct coding_system *this;
- detect_info.checked = detect_info.found = detect_info.rejected = 0;
- for (i = 0; i < coding_category_raw_text; i++)
- {
- category = coding_priorities[i];
- this = coding_categories + category;
- if (this->id < 0)
- {
- /* No coding system of this category is defined. */
- detect_info.rejected |= (1 << category);
- }
- else if (category >= coding_category_raw_text)
- continue;
- else if (detect_info.checked & (1 << category))
- {
- if (detect_info.found & (1 << category))
+ if (coding->head_ascii == coding->src_bytes)
+ /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
+ for (i = 0; i < coding_category_raw_text; i++)
+ {
+ category = coding_priorities[i];
+ this = coding_categories + category;
+ if (detect_info.found & (1 << category))
+ break;
+ }
+ else
+ for (i = 0; i < coding_category_raw_text; i++)
+ {
+ category = coding_priorities[i];
+ this = coding_categories + category;
+ if (this->id < 0)
+ {
+ /* No coding system of this category is defined. */
+ detect_info.rejected |= (1 << category);
+ }
+ else if (category >= coding_category_raw_text)
+ continue;
+ else if (detect_info.checked & (1 << category))
+ {
+ if (detect_info.found & (1 << category))
+ break;
+ }
+ else if ((*(this->detector)) (coding, &detect_info)
+ && detect_info.found & (1 << category))
+ {
+ if (category == coding_category_utf_16_auto)
+ {
+ if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+ category = coding_category_utf_16_le;
+ else
+ category = coding_category_utf_16_be;
+ }
break;
- }
- else if ((*(this->detector)) (coding, &detect_info)
- && detect_info.found & (1 << category))
- break;
- }
+ }
+ }
+
if (i < coding_category_raw_text)
setup_coding_system (CODING_ID_NAME (this->id), coding);
else if (detect_info.rejected == CATEGORY_MASK_ANY)
}
}
}
- else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16))
+ else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
+ == coding_category_utf_16_auto)
{
Lisp_Object coding_systems;
struct coding_detection_info detect_info;
= AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
detect_info.found = detect_info.rejected = 0;
if (CONSP (coding_systems)
- && detect_coding_utf_16 (coding, &detect_info)
- && (detect_info.found & (CATEGORY_MASK_UTF_16_LE
- | CATEGORY_MASK_UTF_16_BE)))
+ && detect_coding_utf_16 (coding, &detect_info))
{
if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
setup_coding_system (XCAR (coding_systems), coding);
- else
+ else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
setup_coding_system (XCDR (coding_systems), coding);
}
}
+}
- attrs = CODING_ID_ATTRS (coding->id);
- coding_type = CODING_ATTR_TYPE (attrs);
-
- /* If we have not yet decided the EOL type, detect it now. But, the
- detection is impossible for a CCL based coding system, in which
- case, we detct the EOL type after decoding. */
- if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
- && ! EQ (coding_type, Qccl))
- {
- int eol_seen = detect_eol (coding->source, coding->src_bytes,
- XINT (CODING_ATTR_CATEGORY (attrs)));
- if (eol_seen != EOL_SEEN_NONE)
- adjust_coding_eol_type (coding, eol_seen);
- }
-}
+static void
+decode_eol (coding)
+ struct coding_system *coding;
+{
+ Lisp_Object eol_type;
+ unsigned char *p, *pbeg, *pend;
+
+ eol_type = CODING_ID_EOL_TYPE (coding->id);
+ if (EQ (eol_type, Qunix))
+ return;
+ if (NILP (coding->dst_object))
+ pbeg = coding->destination;
+ else
+ pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
+ pend = pbeg + coding->produced;
-static void
-decode_eol (coding)
- struct coding_system *coding;
-{
- if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
+ if (VECTORP (eol_type))
{
- unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
- unsigned char *pend = p + coding->produced;
int eol_seen = EOL_SEEN_NONE;
- for (; p < pend; p++)
+ for (p = pbeg; p < pend; p++)
{
if (*p == '\n')
eol_seen |= EOL_SEEN_LF;
eol_seen |= EOL_SEEN_CR;
}
}
+ if (eol_seen != EOL_SEEN_NONE
+ && eol_seen != EOL_SEEN_LF
+ && eol_seen != EOL_SEEN_CRLF
+ && eol_seen != EOL_SEEN_CR)
+ eol_seen = EOL_SEEN_LF;
if (eol_seen != EOL_SEEN_NONE)
- adjust_coding_eol_type (coding, eol_seen);
+ eol_type = adjust_coding_eol_type (coding, eol_seen);
}
- if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
+ if (EQ (eol_type, Qmac))
{
- unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
- unsigned char *pend = p + coding->produced;
-
- for (; p < pend; p++)
+ for (p = pbeg; p < pend; p++)
if (*p == '\r')
*p = '\n';
}
- else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
+ else if (EQ (eol_type, Qdos))
{
- unsigned char *p, *pbeg, *pend;
- Lisp_Object undo_list;
-
- move_gap_both (coding->dst_pos + coding->produced_char,
- coding->dst_pos_byte + coding->produced);
- undo_list = current_buffer->undo_list;
- current_buffer->undo_list = Qt;
- del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
- current_buffer->undo_list = undo_list;
- pbeg = GPT_ADDR;
- pend = pbeg + coding->produced;
+ int n = 0;
- for (p = pend - 1; p >= pbeg; p--)
- if (*p == '\r')
- {
- safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
- pend--;
- }
- coding->produced_char -= coding->produced - (pend - pbeg);
- coding->produced = pend - pbeg;
- insert_from_gap (coding->produced_char, coding->produced);
+ if (NILP (coding->dst_object))
+ {
+ for (p = pend - 2; p >= pbeg; p--)
+ if (*p == '\r')
+ {
+ safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
+ n++;
+ }
+ }
+ else
+ {
+ for (p = pend - 2; p >= pbeg; p--)
+ if (*p == '\r')
+ {
+ int pos_byte = coding->dst_pos_byte + (p - pbeg);
+ int pos = BYTE_TO_CHAR (pos_byte);
+
+ del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
+ n++;
+ }
+ }
+ coding->produced -= n;
+ coding->produced_char -= n;
}
}
-static void
-translate_chars (coding, table)
- struct coding_system *coding;
- Lisp_Object table;
+
+/* Return a translation table (or list of them) from coding system
+ attribute vector ATTRS for encoding (ENCODEP is nonzero) or
+ decoding (ENCODEP is zero). */
+
+static Lisp_Object
+get_translation_table (attrs, encodep, max_lookup)
+ Lisp_Object attrs;
+ int encodep, *max_lookup;
{
- int *charbuf = coding->charbuf;
- int *charbuf_end = charbuf + coding->charbuf_used;
- int c;
+ Lisp_Object standard, translation_table;
+ Lisp_Object val;
- if (coding->chars_at_source)
- return;
+ if (encodep)
+ translation_table = CODING_ATTR_ENCODE_TBL (attrs),
+ standard = Vstandard_translation_table_for_encode;
+ else
+ translation_table = CODING_ATTR_DECODE_TBL (attrs),
+ standard = Vstandard_translation_table_for_decode;
+ if (NILP (translation_table))
+ translation_table = standard;
+ else
+ {
+ if (SYMBOLP (translation_table))
+ translation_table = Fget (translation_table, Qtranslation_table);
+ else if (CONSP (translation_table))
+ {
+ translation_table = Fcopy_sequence (translation_table);
+ for (val = translation_table; CONSP (val); val = XCDR (val))
+ if (SYMBOLP (XCAR (val)))
+ XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
+ }
+ if (CHAR_TABLE_P (standard))
+ {
+ if (CONSP (translation_table))
+ translation_table = nconc2 (translation_table,
+ Fcons (standard, Qnil));
+ else
+ translation_table = Fcons (translation_table,
+ Fcons (standard, Qnil));
+ }
+ }
- while (charbuf < charbuf_end)
+ if (max_lookup)
{
- c = *charbuf;
- if (c < 0)
- charbuf += c;
- else
- *charbuf++ = translate_char (table, c);
+ *max_lookup = 1;
+ if (CHAR_TABLE_P (translation_table)
+ && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
+ {
+ val = XCHAR_TABLE (translation_table)->extras[1];
+ if (NATNUMP (val) && *max_lookup < XFASTINT (val))
+ *max_lookup = XFASTINT (val);
+ }
+ else if (CONSP (translation_table))
+ {
+ Lisp_Object tail, val;
+
+ for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
+ if (CHAR_TABLE_P (XCAR (tail))
+ && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
+ {
+ val = XCHAR_TABLE (XCAR (tail))->extras[1];
+ if (NATNUMP (val) && *max_lookup < XFASTINT (val))
+ *max_lookup = XFASTINT (val);
+ }
+ }
+ }
+ return translation_table;
+}
+
+#define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
+ do { \
+ trans = Qnil; \
+ if (CHAR_TABLE_P (table)) \
+ { \
+ trans = CHAR_TABLE_REF (table, c); \
+ if (CHARACTERP (trans)) \
+ c = XFASTINT (trans), trans = Qnil; \
+ } \
+ else if (CONSP (table)) \
+ { \
+ Lisp_Object tail; \
+ \
+ for (tail = table; CONSP (tail); tail = XCDR (tail)) \
+ if (CHAR_TABLE_P (XCAR (tail))) \
+ { \
+ trans = CHAR_TABLE_REF (XCAR (tail), c); \
+ if (CHARACTERP (trans)) \
+ c = XFASTINT (trans), trans = Qnil; \
+ else if (! NILP (trans)) \
+ break; \
+ } \
+ } \
+ } while (0)
+
+
+static Lisp_Object
+get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
+ Lisp_Object val;
+ int *buf, *buf_end;
+ int last_block;
+ int *from_nchars, *to_nchars;
+{
+ /* VAL is TO or (([FROM-CHAR ...] . TO) ...) where TO is TO-CHAR or
+ [TO-CHAR ...]. */
+ if (CONSP (val))
+ {
+ Lisp_Object from, tail;
+ int i, len;
+
+ for (tail = val; CONSP (tail); tail = XCDR (tail))
+ {
+ val = XCAR (tail);
+ from = XCAR (val);
+ len = ASIZE (from);
+ for (i = 0; i < len; i++)
+ {
+ if (buf + i == buf_end)
+ {
+ if (! last_block)
+ return Qt;
+ break;
+ }
+ if (XINT (AREF (from, i)) != buf[i])
+ break;
+ }
+ if (i == len)
+ {
+ val = XCDR (val);
+ *from_nchars = len;
+ break;
+ }
+ }
+ if (! CONSP (tail))
+ return Qnil;
}
+ if (VECTORP (val))
+ *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
+ else
+ *buf = XINT (val);
+ return val;
}
+
static int
-produce_chars (coding)
+produce_chars (coding, translation_table, last_block)
struct coding_system *coding;
+ Lisp_Object translation_table;
+ int last_block;
{
unsigned char *dst = coding->destination + coding->produced;
unsigned char *dst_end = coding->destination + coding->dst_bytes;
int produced;
int produced_chars = 0;
+ int carryover = 0;
if (! coding->chars_at_source)
{
/* Characters are in coding->charbuf. */
int *buf = coding->charbuf;
int *buf_end = buf + coding->charbuf_used;
- unsigned char *adjusted_dst_end;
if (BUFFERP (coding->src_object)
&& EQ (coding->src_object, coding->dst_object))
dst_end = ((unsigned char *) coding->source) + coding->consumed;
- adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
while (buf < buf_end)
{
- int c = *buf++;
+ int c = *buf, i;
- if (dst >= adjusted_dst_end)
- {
- dst = alloc_destination (coding,
- buf_end - buf + MAX_MULTIBYTE_LENGTH,
- dst);
- dst_end = coding->destination + coding->dst_bytes;
- adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
- }
if (c >= 0)
{
- if (coding->dst_multibyte
- || ! CHAR_BYTE8_P (c))
- CHAR_STRING_ADVANCE (c, dst);
- else
- *dst++ = CHAR_TO_BYTE8 (c);
- produced_chars++;
+ int from_nchars = 1, to_nchars = 1;
+ Lisp_Object trans = Qnil;
+
+ LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
+ if (! NILP (trans))
+ {
+ trans = get_translation (trans, buf, buf_end, last_block,
+ &from_nchars, &to_nchars);
+ if (EQ (trans, Qt))
+ break;
+ c = *buf;
+ }
+
+ if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
+ {
+ dst = alloc_destination (coding,
+ buf_end - buf
+ + MAX_MULTIBYTE_LENGTH * to_nchars,
+ dst);
+ dst_end = coding->destination + coding->dst_bytes;
+ }
+
+ for (i = 0; i < to_nchars; i++)
+ {
+ if (i > 0)
+ c = XINT (AREF (trans, i));
+ if (coding->dst_multibyte
+ || ! CHAR_BYTE8_P (c))
+ CHAR_STRING_ADVANCE (c, dst);
+ else
+ *dst++ = CHAR_TO_BYTE8 (c);
+ }
+ produced_chars += to_nchars;
+ *buf++ = to_nchars;
+ while (--from_nchars > 0)
+ *buf++ = 0;
}
else
- /* This is an annotation datum. (-C) is the length of
- it. */
- buf += -c - 1;
+ /* This is an annotation datum. (-C) is the length. */
+ buf += -c;
}
+ carryover = buf_end - buf;
}
else
{
{
if (src == src_end)
{
- coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+ record_conversion_result
+ (coding, CODING_RESULT_INSUFFICIENT_SRC);
goto no_more_source;
}
if (*src == '\n')
insert_from_gap (produced_chars, produced);
coding->produced += produced;
coding->produced_char += produced_chars;
- return produced_chars;
+ return carryover;
}
/* Compose text in CODING->object according to the annotation data at
*/
static INLINE void
-produce_composition (coding, charbuf)
+produce_composition (coding, charbuf, pos)
struct coding_system *coding;
int *charbuf;
+ EMACS_INT pos;
{
int len;
- EMACS_INT from, to;
+ EMACS_INT to;
enum composition_method method;
Lisp_Object components;
len = -charbuf[0];
- from = coding->dst_pos + charbuf[2];
- to = coding->dst_pos + charbuf[3];
- method = (enum composition_method) (charbuf[4]);
+ to = pos + charbuf[2];
+ if (to <= pos)
+ return;
+ method = (enum composition_method) (charbuf[3]);
if (method == COMPOSITION_RELATIVE)
components = Qnil;
- else
+ else if (method >= COMPOSITION_WITH_RULE
+ && method <= COMPOSITION_WITH_RULE_ALTCHARS)
{
Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
int i;
- len -= 5;
- charbuf += 5;
+ len -= 4;
+ charbuf += 4;
for (i = 0; i < len; i++)
- args[i] = make_number (charbuf[i]);
+ {
+ args[i] = make_number (charbuf[i]);
+ if (args[i] < 0)
+ return;
+ }
components = (method == COMPOSITION_WITH_ALTCHARS
? Fstring (len, args) : Fvector (len, args));
}
- compose_text (from, to, components, Qnil, coding->dst_object);
+ else
+ return;
+ compose_text (pos, to, components, Qnil, coding->dst_object);
}
/* Put `charset' property on text in CODING->object according to
the annotation data at CHARBUF. CHARBUF is an array:
- [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
+ [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
*/
static INLINE void
-produce_charset (coding, charbuf)
+produce_charset (coding, charbuf, pos)
struct coding_system *coding;
int *charbuf;
+ EMACS_INT pos;
{
- EMACS_INT from = coding->dst_pos + charbuf[2];
- EMACS_INT to = coding->dst_pos + charbuf[3];
- struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
+ EMACS_INT from = pos - charbuf[2];
+ struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
- Fput_text_property (make_number (from), make_number (to),
+ Fput_text_property (make_number (from), make_number (pos),
Qcharset, CHARSET_NAME (charset),
coding->dst_object);
}
} \
if (! coding->charbuf) \
{ \
- coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
+ record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
return coding->result; \
} \
coding->charbuf_size = size; \
static void
-produce_annotation (coding)
+produce_annotation (coding, pos)
struct coding_system *coding;
+ EMACS_INT pos;
{
int *charbuf = coding->charbuf;
int *charbuf_end = charbuf + coding->charbuf_used;
while (charbuf < charbuf_end)
{
if (*charbuf >= 0)
- charbuf++;
+ pos += *charbuf++;
else
{
int len = -*charbuf;
switch (charbuf[1])
{
case CODING_ANNOTATE_COMPOSITION_MASK:
- produce_composition (coding, charbuf);
+ produce_composition (coding, charbuf, pos);
break;
case CODING_ANNOTATE_CHARSET_MASK:
- produce_charset (coding, charbuf);
+ produce_charset (coding, charbuf, pos);
break;
default:
abort ();
struct coding_system *coding;
{
Lisp_Object attrs;
+ Lisp_Object undo_list;
+ Lisp_Object translation_table;
+ int carryover;
+ int i;
if (BUFFERP (coding->src_object)
&& coding->src_pos > 0
&& coding->src_pos + coding->src_chars > GPT)
move_gap_both (coding->src_pos, coding->src_pos_byte);
+ undo_list = Qt;
if (BUFFERP (coding->dst_object))
{
if (current_buffer != XBUFFER (coding->dst_object))
set_buffer_internal (XBUFFER (coding->dst_object));
if (GPT != PT)
move_gap_both (PT, PT_BYTE);
+ undo_list = current_buffer->undo_list;
+ current_buffer->undo_list = Qt;
}
coding->consumed = coding->consumed_char = 0;
coding->produced = coding->produced_char = 0;
coding->chars_at_source = 0;
- coding->result = CODING_RESULT_SUCCESS;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
coding->errors = 0;
ALLOC_CONVERSION_WORK_AREA (coding);
attrs = CODING_ID_ATTRS (coding->id);
+ translation_table = get_translation_table (attrs, 0, NULL);
+ carryover = 0;
do
{
+ EMACS_INT pos = coding->dst_pos + coding->produced_char;
+
coding_set_source (coding);
coding->annotated = 0;
+ coding->charbuf_used = carryover;
(*(coding->decoder)) (coding);
- if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
- translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs));
- else if (!NILP (Vstandard_translation_table_for_decode))
- translate_chars (coding, Vstandard_translation_table_for_decode);
coding_set_destination (coding);
- produce_chars (coding);
+ carryover = produce_chars (coding, translation_table, 0);
if (coding->annotated)
- produce_annotation (coding);
+ produce_annotation (coding, pos);
+ for (i = 0; i < carryover; i++)
+ coding->charbuf[i]
+ = coding->charbuf[coding->charbuf_used - carryover + i];
}
while (coding->consumed < coding->src_bytes
&& ! coding->result);
- if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
- && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
- && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
- decode_eol (coding);
+ if (carryover > 0)
+ {
+ coding_set_destination (coding);
+ coding->charbuf_used = carryover;
+ produce_chars (coding, translation_table, 1);
+ }
coding->carryover_bytes = 0;
if (coding->consumed < coding->src_bytes)
/* Flush out unprocessed data as binary chars. We are sure
that the number of data is less than the size of
coding->charbuf. */
+ coding->charbuf_used = 0;
while (nbytes-- > 0)
{
int c = *src++;
coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
}
- produce_chars (coding);
+ produce_chars (coding, Qnil, 1);
}
else
{
coding->consumed = coding->src_bytes;
}
+ if (BUFFERP (coding->dst_object))
+ {
+ current_buffer->undo_list = undo_list;
+ record_insert (coding->dst_pos, coding->produced_char);
+ }
+ if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
+ decode_eol (coding);
return coding->result;
}
enum composition_method method = COMPOSITION_METHOD (prop);
int nchars = COMPOSITION_LENGTH (prop);
- ADD_COMPOSITION_DATA (buf, 0, nchars, method);
+ ADD_COMPOSITION_DATA (buf, nchars, method);
if (method != COMPOSITION_RELATIVE)
{
Lisp_Object components;
id = XINT (CHARSET_SYMBOL_ID (val));
else
id = -1;
- ADD_CHARSET_DATA (buf, 0, 0, id);
+ ADD_CHARSET_DATA (buf, 0, id);
next = Fnext_single_property_change (make_number (pos), Qcharset,
coding->src_object,
make_number (limit));
static void
-consume_chars (coding)
+consume_chars (coding, translation_table, max_lookup)
struct coding_system *coding;
+ Lisp_Object translation_table;
+ int max_lookup;
{
int *buf = coding->charbuf;
int *buf_end = coding->charbuf + coding->charbuf_size;
Lisp_Object eol_type;
int c;
EMACS_INT stop, stop_composition, stop_charset;
+ int *lookup_buf = NULL;
+
+ if (! NILP (translation_table))
+ lookup_buf = alloca (sizeof (int) * max_lookup);
eol_type = CODING_ID_EOL_TYPE (coding->id);
if (VECTORP (eol_type))
/* Note: composition handling is not yet implemented. */
coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
- if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
- stop = stop_composition = pos;
- else
- stop = stop_composition = end_pos;
- if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
- stop = stop_charset = pos;
+ if (NILP (coding->src_object))
+ stop = stop_composition = stop_charset = end_pos;
else
- stop_charset = end_pos;
+ {
+ if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
+ stop = stop_composition = pos;
+ else
+ stop = stop_composition = end_pos;
+ if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
+ stop = stop_charset = pos;
+ else
+ stop_charset = end_pos;
+ }
- /* Compensate for CRLF and annotation. */
+ /* Compensate for CRLF and conversion. */
buf_end -= 1 + MAX_ANNOTATION_LENGTH;
while (buf < buf_end)
{
+ Lisp_Object trans;
+
if (pos == stop)
{
if (pos == end_pos)
{
EMACS_INT bytes;
- if (! CODING_FOR_UNIBYTE (coding)
- && (bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
+ if (coding->encoder == encode_coding_raw_text)
+ c = *src++, pos++;
+ else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
c = STRING_CHAR_ADVANCE (src), pos += bytes;
else
- c = *src++, pos++;
+ c = BYTE8_TO_CHAR (*src), src++, pos++;
}
else
c = STRING_CHAR_ADVANCE (src), pos++;
c = '\r';
}
}
- *buf++ = c;
+
+ trans = Qnil;
+ LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
+ if (NILP (trans))
+ *buf++ = c;
+ else
+ {
+ int from_nchars = 1, to_nchars = 1;
+ int *lookup_buf_end;
+ const unsigned char *p = src;
+ int i;
+
+ lookup_buf[0] = c;
+ for (i = 1; i < max_lookup && p < src_end; i++)
+ lookup_buf[i] = STRING_CHAR_ADVANCE (p);
+ lookup_buf_end = lookup_buf + i;
+ trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
+ &from_nchars, &to_nchars);
+ if (EQ (trans, Qt)
+ || buf + to_nchars > buf_end)
+ break;
+ *buf++ = *lookup_buf;
+ for (i = 1; i < to_nchars; i++)
+ *buf++ = XINT (AREF (trans, i));
+ for (i = 1; i < from_nchars; i++, pos++)
+ src += MULTIBYTE_LENGTH_NO_CHECK (src);
+ }
}
coding->consumed = src - coding->source;
struct coding_system *coding;
{
Lisp_Object attrs;
+ Lisp_Object translation_table;
+ int max_lookup;
attrs = CODING_ID_ATTRS (coding->id);
+ if (coding->encoder == encode_coding_raw_text)
+ translation_table = Qnil, max_lookup = 0;
+ else
+ translation_table = get_translation_table (attrs, 1, &max_lookup);
if (BUFFERP (coding->dst_object))
{
coding->consumed = coding->consumed_char = 0;
coding->produced = coding->produced_char = 0;
- coding->result = CODING_RESULT_SUCCESS;
+ record_conversion_result (coding, CODING_RESULT_SUCCESS);
coding->errors = 0;
ALLOC_CONVERSION_WORK_AREA (coding);
do {
coding_set_source (coding);
- consume_chars (coding);
-
- if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
- translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs));
- else if (!NILP (Vstandard_translation_table_for_encode))
- translate_chars (coding, Vstandard_translation_table_for_encode);
-
+ consume_chars (coding, translation_table, max_lookup);
coding_set_destination (coding);
(*(coding->encoder)) (coding);
} while (coding->consumed_char < coding->src_chars);
}
-/* Stack of working buffers used in code conversion. An nil element
- means that the code conversion of that level is not using a working
- buffer. */
-Lisp_Object Vcode_conversion_work_buf_list;
+/* Name (or base name) of work buffer for code conversion. */
+static Lisp_Object Vcode_conversion_workbuf_name;
-/* A working buffer used by the top level conversion. */
-Lisp_Object Vcode_conversion_reused_work_buf;
+/* A working buffer used by the top level conversion. Once it is
+ created, it is never destroyed. It has the name
+ Vcode_conversion_workbuf_name. The other working buffers are
+ destroyed after the use is finished, and their names are modified
+ versions of Vcode_conversion_workbuf_name. */
+static Lisp_Object Vcode_conversion_reused_workbuf;
+/* 1 iff Vcode_conversion_reused_workbuf is already in use. */
+static int reused_workbuf_in_use;
-/* Return a working buffer that can be freely used by the following
- code conversion. MULTIBYTEP specifies the multibyteness of the
- buffer. */
-Lisp_Object
-make_conversion_work_buffer (multibytep, depth)
- int multibytep, depth;
+/* Return a working buffer of code convesion. MULTIBYTE specifies the
+ multibyteness of returning buffer. */
+
+static Lisp_Object
+make_conversion_work_buffer (multibyte)
+ int multibyte;
{
- struct buffer *current = current_buffer;
- Lisp_Object buf, name;
+ Lisp_Object name, workbuf;
+ struct buffer *current;
- if (depth == 0)
+ if (reused_workbuf_in_use++)
{
- if (NILP (Vcode_conversion_reused_work_buf))
- Vcode_conversion_reused_work_buf
- = Fget_buffer_create (build_string (" *code-converting-work<0>*"));
- buf = Vcode_conversion_reused_work_buf;
+ name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
+ workbuf = Fget_buffer_create (name);
}
else
{
- if (depth < 0)
- {
- name = build_string (" *code-converting-work*");
- name = Fgenerate_new_buffer_name (name, Qnil);
- }
- else
- {
- char str[128];
-
- sprintf (str, " *code-converting-work*<%d>", depth);
- name = build_string (str);
- }
- buf = Fget_buffer_create (name);
+ name = Vcode_conversion_workbuf_name;
+ workbuf = Fget_buffer_create (name);
+ if (NILP (Vcode_conversion_reused_workbuf))
+ Vcode_conversion_reused_workbuf = workbuf;
}
- set_buffer_internal (XBUFFER (buf));
+ current = current_buffer;
+ set_buffer_internal (XBUFFER (workbuf));
+ Ferase_buffer ();
current_buffer->undo_list = Qt;
- Ferase_buffer ();
- Fset_buffer_multibyte (multibytep ? Qt : Qnil);
+ current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
set_buffer_internal (current);
- return buf;
+ return workbuf;
}
+
static Lisp_Object
-code_conversion_restore (buffer)
- Lisp_Object buffer;
+code_conversion_restore (arg)
+ Lisp_Object arg;
{
- Lisp_Object workbuf;
-
- workbuf = XCAR (Vcode_conversion_work_buf_list);
- if (! NILP (workbuf)
- && ! EQ (workbuf, Vcode_conversion_reused_work_buf)
- && ! NILP (Fbuffer_live_p (workbuf)))
- Fkill_buffer (workbuf);
- Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
- set_buffer_internal (XBUFFER (buffer));
+ Lisp_Object current, workbuf;
+
+ current = XCAR (arg);
+ workbuf = XCDR (arg);
+ if (! NILP (workbuf))
+ {
+ if (EQ (workbuf, Vcode_conversion_reused_workbuf))
+ reused_workbuf_in_use = 0;
+ else if (! NILP (Fbuffer_live_p (workbuf)))
+ Fkill_buffer (workbuf);
+ }
+ set_buffer_internal (XBUFFER (current));
return Qnil;
}
-static Lisp_Object
-code_conversion_save (buffer, with_work_buf, multibyte)
- Lisp_Object buffer;
+Lisp_Object
+code_conversion_save (with_work_buf, multibyte)
int with_work_buf, multibyte;
{
- Lisp_Object workbuf;
+ Lisp_Object workbuf = Qnil;
if (with_work_buf)
- {
- int depth = XINT (Flength (Vcode_conversion_work_buf_list));
-
- workbuf = make_conversion_work_buffer (multibyte, depth);
- }
- else
- workbuf = Qnil;
- Vcode_conversion_work_buf_list
- = Fcons (workbuf, Vcode_conversion_work_buf_list);
- record_unwind_protect (code_conversion_restore, buffer);
+ workbuf = make_conversion_work_buffer (multibyte);
+ record_unwind_protect (code_conversion_restore,
+ Fcons (Fcurrent_buffer (), workbuf));
return workbuf;
}
{
int count = specpdl_ptr - specpdl;
Lisp_Object attrs;
- Lisp_Object buffer;
- buffer = Fcurrent_buffer ();
- code_conversion_save (buffer, 0, 0);
+ code_conversion_save (0, 0);
- coding->src_object = buffer;
+ coding->src_object = Fcurrent_buffer ();
coding->src_chars = chars;
coding->src_bytes = bytes;
coding->src_pos = -chars;
coding->src_pos_byte = -bytes;
coding->src_multibyte = chars < bytes;
- coding->dst_object = buffer;
+ coding->dst_object = coding->src_object;
coding->dst_pos = PT;
coding->dst_pos_byte = PT_BYTE;
coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
EMACS_INT chars, bytes;
{
int count = specpdl_ptr - specpdl;
- Lisp_Object buffer;
- buffer = Fcurrent_buffer ();
- code_conversion_save (buffer, 0, 0);
+ code_conversion_save (0, 0);
- coding->src_object = buffer;
+ coding->src_object = Fcurrent_buffer ();
coding->src_chars = chars;
coding->src_bytes = bytes;
coding->src_pos = -chars;
|| (! NILP (CODING_ATTR_POST_READ (attrs))
&& NILP (dst_object)))
{
- coding->dst_object = code_conversion_save (buffer, 1, 1);
+ coding->dst_object = code_conversion_save (1, 1);
coding->dst_pos = BEG;
coding->dst_pos_byte = BEG_BYTE;
coding->dst_multibyte = 1;
}
else if (BUFFERP (dst_object))
{
- code_conversion_save (buffer, 0, 0);
+ code_conversion_save (0, 0);
coding->dst_object = dst_object;
coding->dst_pos = BUF_PT (XBUFFER (dst_object));
coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
}
else
{
- code_conversion_save (buffer, 0, 0);
+ code_conversion_save (0, 0);
coding->dst_object = Qnil;
coding->dst_multibyte = 1;
}
= (unsigned char *) xrealloc (destination, coding->produced);
if (! destination)
{
- coding->result = CODING_RESULT_INSUFFICIENT_DST;
+ record_conversion_result (coding,
+ CODING_RESULT_INSUFFICIENT_DST);
unbind_to (count, Qnil);
return;
}
saved_pt_byte + (coding->produced - bytes));
}
- unbind_to (count, Qnil);
+ unbind_to (count, coding->dst_object);
}
if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
{
- coding->src_object = code_conversion_save (buffer, 1,
- coding->src_multibyte);
+ coding->src_object = code_conversion_save (1, coding->src_multibyte);
set_buffer_internal (XBUFFER (coding->src_object));
if (STRINGP (src_object))
insert_from_string (src_object, from, from_byte, chars, bytes, 0);
}
else if (STRINGP (src_object))
{
- code_conversion_save (buffer, 0, 0);
+ code_conversion_save (0, 0);
coding->src_pos = from;
coding->src_pos_byte = from_byte;
}
else if (BUFFERP (src_object))
{
- code_conversion_save (buffer, 0, 0);
+ code_conversion_save (0, 0);
set_buffer_internal (XBUFFER (src_object));
if (EQ (src_object, dst_object))
{
}
}
else
- code_conversion_save (buffer, 0, 0);
+ code_conversion_save (0, 0);
if (BUFFERP (dst_object))
{
DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
1, 1, 0,
doc: /* Check validity of CODING-SYSTEM.
-If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
+If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
+It is valid if it is nil or a symbol defined as a coding system by the
+function `define-coding-system'. */)
(coding_system)
Lisp_Object coding_system;
{
detect only text-format. */
Lisp_Object
-detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
+detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
+ coding_system)
const unsigned char *src;
- int src_bytes, highest;
+ int src_chars, src_bytes, highest;
int multibytep;
Lisp_Object coding_system;
{
struct coding_system coding;
int id;
struct coding_detection_info detect_info;
+ enum coding_category base_category;
if (NILP (coding_system))
coding_system = Qundecided;
coding_system = CODING_ATTR_BASE_NAME (attrs);
coding.source = src;
+ coding.src_chars = src_chars;
coding.src_bytes = src_bytes;
coding.src_multibyte = multibytep;
coding.consumed = 0;
detect_info.checked = detect_info.found = detect_info.rejected = 0;
/* At first, detect text-format if necessary. */
- if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided)
+ base_category = XINT (CODING_ATTR_CATEGORY (attrs));
+ if (base_category == coding_category_undecided)
{
enum coding_category category;
struct coding_system *this;
int c, i;
- for (; src < src_end; src++)
+ /* Skip all ASCII bytes except for a few ISO2022 controls. */
+ for (i = 0; src < src_end; i++, src++)
{
c = *src;
- if (c & 0x80
- || (c < 0x20 && (c == ISO_CODE_ESC
- || c == ISO_CODE_SI
- || c == ISO_CODE_SO)))
+ if (c & 0x80)
break;
+ if (c < 0x20
+ && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+ && inhibit_iso_escape_detection)
+ {
+ coding.head_ascii = src - coding.source;
+ if (detect_coding_iso_2022 (&coding, &detect_info))
+ {
+ /* We have scanned the whole data. */
+ if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
+ /* We didn't find an 8-bit code. */
+ src = src_end;
+ break;
+ }
+ }
}
coding.head_ascii = src - coding.source;
- if (src < src_end)
- for (i = 0; i < coding_category_raw_text; i++)
- {
- category = coding_priorities[i];
- this = coding_categories + category;
-
- if (this->id < 0)
- {
- /* No coding system of this category is defined. */
- detect_info.rejected |= (1 << category);
- }
- else if (category >= coding_category_raw_text)
- continue;
- else if (detect_info.checked & (1 << category))
+ if (src < src_end
+ || detect_info.found)
+ {
+ if (src == src_end)
+ /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings. */
+ for (i = 0; i < coding_category_raw_text; i++)
{
- if (highest
- && (detect_info.found & (1 << category)))
+ category = coding_priorities[i];
+ if (detect_info.found & (1 << category))
break;
}
- else
+ else
+ for (i = 0; i < coding_category_raw_text; i++)
{
- if ((*(this->detector)) (&coding, &detect_info)
- && highest
- && (detect_info.found & (1 << category)))
- break;
- }
- }
+ category = coding_priorities[i];
+ this = coding_categories + category;
+ if (this->id < 0)
+ {
+ /* No coding system of this category is defined. */
+ detect_info.rejected |= (1 << category);
+ }
+ else if (category >= coding_category_raw_text)
+ continue;
+ else if (detect_info.checked & (1 << category))
+ {
+ if (highest
+ && (detect_info.found & (1 << category)))
+ break;
+ }
+ else
+ {
+ if ((*(this->detector)) (&coding, &detect_info)
+ && highest
+ && (detect_info.found & (1 << category)))
+ {
+ if (category == coding_category_utf_16_auto)
+ {
+ if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+ category = coding_category_utf_16_le;
+ else
+ category = coding_category_utf_16_be;
+ }
+ break;
+ }
+ }
+ }
+ }
if (detect_info.rejected == CATEGORY_MASK_ANY)
{
detect_info.found |= found;
}
}
+ else if (base_category == coding_category_utf_16_auto)
+ {
+ if (detect_coding_utf_16 (&coding, &detect_info))
+ {
+ struct coding_system *this;
+
+ if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+ this = coding_categories + coding_category_utf_16_le;
+ else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
+ this = coding_categories + coding_category_utf_16_be;
+ else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
+ this = coding_categories + coding_category_utf_16_be_nosig;
+ else
+ this = coding_categories + coding_category_utf_16_le_nosig;
+ val = Fcons (make_number (this->id), Qnil);
+ }
+ }
else
{
detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
move_gap_both (to, to_byte);
return detect_coding_system (BYTE_POS_ADDR (from_byte),
- to_byte - from_byte,
+ to - from, to_byte - from_byte,
!NILP (highest),
!NILP (current_buffer
->enable_multibyte_characters),
{
CHECK_STRING (string);
- return detect_coding_system (SDATA (string), SBYTES (string),
+ return detect_coding_system (SDATA (string),
+ SCHARS (string), SBYTES (string),
!NILP (highest), STRING_MULTIBYTE (string),
Qnil);
}
{
Lisp_Object tail;
struct charset *charset;
+ Lisp_Object translation_table;
+ translation_table = CODING_ATTR_TRANS_TBL (attrs);
+ if (! NILP (translation_table))
+ c = translate_char (translation_table, c);
for (tail = CODING_ATTR_CHARSET_LIST (attrs);
CONSP (tail); tail = XCDR (tail))
{
attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
&& ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
- coding_attrs_list = Fcons (attrs, coding_attrs_list);
+ {
+ ASET (attrs, coding_attr_trans_tbl,
+ get_translation_table (attrs, 1, NULL));
+ coding_attrs_list = Fcons (attrs, coding_attrs_list);
+ }
}
if (STRINGP (start))
}
}
- safe_codings = Qnil;
+ safe_codings = list2 (Qraw_text, Qno_conversion);
for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
if (! NILP (XCAR (tail)))
safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
{
int n;
struct coding_system coding;
- Lisp_Object attrs, charset_list;
+ Lisp_Object attrs, charset_list, translation_table;
Lisp_Object positions;
int from, to;
const unsigned char *p, *stop, *pend;
return Qnil;
ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
charset_list = CODING_ATTR_CHARSET_LIST (attrs);
+ translation_table = get_translation_table (attrs, 1, NULL);
if (NILP (string))
{
c = STRING_CHAR_ADVANCE (p);
if (! (ASCII_CHAR_P (c) && ascii_compatible)
- && ! char_charset (c, charset_list, NULL))
+ && ! char_charset (translate_char (translation_table, c),
+ charset_list, NULL))
{
positions = Fcons (make_number (from), positions);
n--;
int pos;
const unsigned char *p, *pbeg, *pend;
int c;
- Lisp_Object tail, elt;
+ Lisp_Object tail, elt, attrs;
if (STRINGP (start))
{
for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
{
elt = XCAR (tail);
- list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
- Qnil)),
- list);
+ attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
+ ASET (attrs, coding_attr_trans_tbl,
+ get_translation_table (attrs, 1, NULL));
+ list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
}
if (STRINGP (start))
}
-
Lisp_Object
code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
Lisp_Object start, end, coding_system, dst_object;
if (! norecord)
Vlast_coding_system_used = CODING_ID_NAME (coding.id);
- if (coding.result != CODING_RESULT_SUCCESS)
- error ("Code conversion error: %d", coding.result);
-
return (BUFFERP (dst_object)
? make_number (coding.produced_char)
: coding.dst_object);
if (! norecord)
Vlast_coding_system_used = CODING_ID_NAME (coding.id);
- if (coding.result != CODING_RESULT_SUCCESS)
- error ("Code conversion error: %d", coding.result);
-
return (BUFFERP (dst_object)
? make_number (coding.produced_char)
: coding.dst_object);
CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
val = args[coding_arg_decode_translation_table];
- if (! NILP (val))
- CHECK_CHAR_TABLE (val);
+ if (! CHAR_TABLE_P (val) && ! CONSP (val))
+ CHECK_SYMBOL (val);
CODING_ATTR_DECODE_TBL (attrs) = val;
val = args[coding_arg_encode_translation_table];
- if (! NILP (val))
- CHECK_CHAR_TABLE (val);
+ if (! CHAR_TABLE_P (val) && ! CONSP (val))
+ CHECK_SYMBOL (val);
CODING_ATTR_ENCODE_TBL (attrs) = val;
val = args[coding_arg_post_read_conversion];
if (EQ (coding_type, Qcharset))
{
- Lisp_Object list;
/* Generate a lisp vector of 256 elements. Each element is nil,
integer, or a list of charset IDs.
If Nth element is a list of charset IDs, N is the first byte
of one of them. The list is sorted by dimensions of the
- charsets. A charset of smaller dimension comes firtst.
- */
- for (list = Qnil, tail = charset_list; CONSP (tail); tail = XCDR (tail))
- {
- struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
-
- if (charset->method == CHARSET_METHOD_SUPERSET)
- {
- val = CHARSET_SUPERSET (charset);
- for (; CONSP (val); val = XCDR (val))
- list = Fcons (XCAR (XCAR (val)), list);
- }
- else
- list = Fcons (XCAR (tail), list);
- }
-
+ charsets. A charset of smaller dimension comes firtst. */
val = Fmake_vector (make_number (256), Qnil);
- for (tail = Fnreverse (list); CONSP (tail); tail = XCDR (tail))
+ for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
{
struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
int dim = CHARSET_DIMENSION (charset);
struct charset *charset;
- if (XINT (Flength (charset_list)) != 3)
- error ("There should be just three charsets");
+ if (XINT (Flength (charset_list)) != 3
+ && XINT (Flength (charset_list)) != 4)
+ error ("There should be three or four charsets");
charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
if (CHARSET_DIMENSION (charset) != 1)
error ("Dimension of charset %s is not two",
SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
+ charset_list = XCDR (charset_list);
+ if (! NILP (charset_list))
+ {
+ charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
+ if (CHARSET_DIMENSION (charset) != 2)
+ error ("Dimension of charset %s is not two",
+ SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
+ }
+
category = coding_category_sjis;
Vsjis_coding_system = name;
}
make_number (nargs)));
}
-/* Fixme: should this record the alias relationships for
- diagnostics? Should it update coding-system-list? */
+
+DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
+ 3, 3, 0,
+ doc: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
+ (coding_system, prop, val)
+ Lisp_Object coding_system, prop, val;
+{
+ Lisp_Object spec, attrs;
+
+ CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
+ attrs = AREF (spec, 0);
+ if (EQ (prop, QCmnemonic))
+ {
+ if (! STRINGP (val))
+ CHECK_CHARACTER (val);
+ CODING_ATTR_MNEMONIC (attrs) = val;
+ }
+ else if (EQ (prop, QCdefalut_char))
+ {
+ if (NILP (val))
+ val = make_number (' ');
+ else
+ CHECK_CHARACTER (val);
+ CODING_ATTR_DEFAULT_CHAR (attrs) = val;
+ }
+ else if (EQ (prop, QCdecode_translation_table))
+ {
+ if (! CHAR_TABLE_P (val) && ! CONSP (val))
+ CHECK_SYMBOL (val);
+ CODING_ATTR_DECODE_TBL (attrs) = val;
+ }
+ else if (EQ (prop, QCencode_translation_table))
+ {
+ if (! CHAR_TABLE_P (val) && ! CONSP (val))
+ CHECK_SYMBOL (val);
+ CODING_ATTR_ENCODE_TBL (attrs) = val;
+ }
+ else if (EQ (prop, QCpost_read_conversion))
+ {
+ CHECK_SYMBOL (val);
+ CODING_ATTR_POST_READ (attrs) = val;
+ }
+ else if (EQ (prop, QCpre_write_conversion))
+ {
+ CHECK_SYMBOL (val);
+ CODING_ATTR_PRE_WRITE (attrs) = val;
+ }
+
+ CODING_ATTR_PLIST (attrs)
+ = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
+ return val;
+}
+
+
DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
Sdefine_coding_system_alias, 2, 2, 0,
doc: /* Define ALIAS as an alias for CODING-SYSTEM. */)
CHECK_SYMBOL (alias);
CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
aliases = AREF (spec, 1);
+ /* ALISES should be a list of length more than zero, and the first
+ element is a base coding system. Append ALIAS at the tail of the
+ list. */
while (!NILP (XCDR (aliases)))
aliases = XCDR (aliases);
XSETCDR (aliases, Fcons (alias, Qnil));
for (i = 0; i < 3; i++)
Fdefine_coding_system_alias (AREF (subsidiaries, i),
AREF (eol_type, i));
-
- ASET (spec, 2, subsidiaries);
}
Fputhash (alias, spec, Vcoding_system_hash_table);
+ Vcoding_system_list = Fcons (alias, Vcoding_system_list);
Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
Vcoding_system_alist);
iso_code_class[i] = ISO_graphic_plane_1;
iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
- iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
iso_code_class[ISO_CODE_SO] = ISO_shift_out;
iso_code_class[ISO_CODE_SI] = ISO_shift_in;
iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
- inhibit_pre_post_conversion = 0;
-
for (i = 0; i < 256; i++)
{
emacs_mule_bytes[i] = 1;
staticpro (&Vbig5_coding_system);
Vbig5_coding_system = Qnil;
- staticpro (&Vcode_conversion_work_buf_list);
- Vcode_conversion_work_buf_list = Qnil;
+ staticpro (&Vcode_conversion_reused_workbuf);
+ Vcode_conversion_reused_workbuf = Qnil;
+
+ staticpro (&Vcode_conversion_workbuf_name);
+ Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
- staticpro (&Vcode_conversion_reused_work_buf);
- Vcode_conversion_reused_work_buf = Qnil;
+ reused_workbuf_in_use = 0;
DEFSYM (Qcharset, "charset");
DEFSYM (Qtarget_idx, "target-idx");
Qchar_table_extra_slots = intern ("char-table-extra-slots");
DEFSYM (Qtranslation_table, "translation-table");
- Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
+ Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
DEFSYM (Qtranslation_table_id, "translation-table-id");
DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
DEFSYM (Qemacs_mule, "emacs-mule");
DEFSYM (QCcategory, ":category");
+ DEFSYM (QCmnemonic, ":mnemonic");
+ DEFSYM (QCdefalut_char, ":default-char");
+ DEFSYM (QCdecode_translation_table, ":decode-translation-table");
+ DEFSYM (QCencode_translation_table, ":encode-translation-table");
+ DEFSYM (QCpost_read_conversion, ":post-read-conversion");
+ DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
Vcoding_category_table
= Fmake_vector (make_number (coding_category_max), Qnil);
ASET (Vcoding_category_table, coding_category_undecided,
intern ("coding-category-undecided"));
+ DEFSYM (Qinsufficient_source, "insufficient-source");
+ DEFSYM (Qinconsistent_eol, "inconsistent-eol");
+ DEFSYM (Qinvalid_source, "invalid-source");
+ DEFSYM (Qinterrupted, "interrupted");
+ DEFSYM (Qinsufficient_memory, "insufficient-memory");
+
defsubr (&Scoding_system_p);
defsubr (&Sread_coding_system);
defsubr (&Sread_non_nil_coding_system);
defsubr (&Sset_coding_system_priority);
defsubr (&Sdefine_coding_system_internal);
defsubr (&Sdefine_coding_system_alias);
+ defsubr (&Scoding_system_put);
defsubr (&Scoding_system_base);
defsubr (&Scoding_system_plist);
defsubr (&Scoding_system_aliases);
Coding system used in the latest file or process I/O. */);
Vlast_coding_system_used = Qnil;
+ DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
+ doc: /*
+Error status of the last code conversion.
+
+When an error was detected in the last code conversion, this variable
+is set to one of the following symbols.
+ `insufficient-source'
+ `inconsistent-eol'
+ `invalid-source'
+ `interrupted'
+ `insufficient-memory'
+When no error was detected, the value doesn't change. So, to check
+the error status of a code conversion by this variable, you must
+explicitly set this variable to nil before performing code
+conversion. */);
+ Vlast_code_conversion_error = Qnil;
+
DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
doc: /*
*Non-nil means always inhibit code conversion of end-of-line format.
}
#endif /* emacs */
+
+/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
+ (do not change this comment) */